Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/muninn/searcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ defmodule Muninn.Searcher do
* `{:ok, results}` - Search results with total_hits and hits
* `{:error, reason}` - Search or parse failed

> Note: `total_hits` is the number of hits returned in this response — capped by
> `:limit` and equal to `length(results["hits"])`, not the total number of
> matching documents. Use `count/3` for the full match count regardless of limit.

## Examples

# Search for "elixir" in title and content fields
Expand Down Expand Up @@ -184,6 +188,10 @@ defmodule Muninn.Searcher do
* `{:ok, results}` - Search results with snippets
* `{:error, reason}` - Search or parse failed

> Note: `total_hits` is the number of hits returned in this response — capped by
> `:limit` and equal to `length(results["hits"])`, not the total number of
> matching documents. Use `count/3` for the full match count regardless of limit.

Result format includes an additional `"snippets"` map with HTML-highlighted snippets:

%{
Expand Down Expand Up @@ -275,6 +283,10 @@ defmodule Muninn.Searcher do
* `{:ok, results}` - Search results with total_hits and hits
* `{:error, reason}` - Search failed

> Note: `total_hits` is the number of hits returned in this response — capped by
> `:limit` and equal to `length(results["hits"])`, not the total number of
> matching documents. Use `count/3` for the full match count regardless of limit.

## Examples

# Search for titles starting with "eli"
Expand Down
76 changes: 76 additions & 0 deletions test/muninn/prefix_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
defmodule Muninn.PrefixTest do
use ExUnit.Case, async: true

alias Muninn.{Index, IndexWriter, IndexReader, Searcher, Schema}

setup do
test_path = "/tmp/muninn_prefix_#{:erlang.unique_integer([:positive])}"
on_exit(fn -> Muninn.TestHelpers.safe_rm_rf(test_path) end)
{:ok, test_path: test_path}
end

defp searcher_for(test_path, docs) do
schema = Schema.new() |> Schema.add_text_field("title", stored: true, indexed: true)
{:ok, index} = Index.create(test_path, schema)
Enum.each(docs, &IndexWriter.add_document(index, &1))
IndexWriter.commit(index)
{:ok, reader} = IndexReader.new(index)
{:ok, searcher} = Searcher.new(reader)
searcher
end

describe "search_prefix/4" do
test "matches documents whose term starts with the prefix", %{test_path: test_path} do
searcher =
searcher_for(test_path, [
%{"title" => "Phoenix Framework"},
%{"title" => "Photography Tips"},
%{"title" => "Elixir Guide"}
])

{:ok, results} = Searcher.search_prefix(searcher, "title", "pho", limit: 10)

assert results["total_hits"] == 2
titles = Enum.map(results["hits"], & &1["doc"]["title"])
assert "Phoenix Framework" in titles
assert "Photography Tips" in titles
refute "Elixir Guide" in titles
end

test "narrows results as the prefix grows (typeahead)", %{test_path: test_path} do
searcher =
searcher_for(test_path, [
%{"title" => "program"},
%{"title" => "programming"},
%{"title" => "progress"},
%{"title" => "project"}
])

{:ok, r1} = Searcher.search_prefix(searcher, "title", "pro", limit: 10)
{:ok, r2} = Searcher.search_prefix(searcher, "title", "progr", limit: 10)
{:ok, r3} = Searcher.search_prefix(searcher, "title", "program", limit: 10)

assert r1["total_hits"] == 4
assert r2["total_hits"] == 3
assert r3["total_hits"] == 2
end

test "respects the limit option", %{test_path: test_path} do
docs = for i <- 1..10, do: %{"title" => "testitem#{i}"}
searcher = searcher_for(test_path, docs)

{:ok, results} = Searcher.search_prefix(searcher, "title", "testitem", limit: 5)

assert length(results["hits"]) <= 5
end

test "returns no hits for a non-matching prefix", %{test_path: test_path} do
searcher = searcher_for(test_path, [%{"title" => "Elixir"}, %{"title" => "Phoenix"}])

{:ok, results} = Searcher.search_prefix(searcher, "title", "zzz", limit: 10)

assert results["total_hits"] == 0
assert results["hits"] == []
end
end
end
115 changes: 115 additions & 0 deletions test/muninn/snippets_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
defmodule Muninn.SnippetsTest do
use ExUnit.Case, async: true

alias Muninn.{Index, IndexWriter, IndexReader, Searcher, Schema}

setup do
test_path = "/tmp/muninn_snippets_#{:erlang.unique_integer([:positive])}"
on_exit(fn -> Muninn.TestHelpers.safe_rm_rf(test_path) end)
{:ok, test_path: test_path}
end

defp searcher_for(test_path, schema, docs) do
{:ok, index} = Index.create(test_path, schema)
Enum.each(docs, &IndexWriter.add_document(index, &1))
IndexWriter.commit(index)
{:ok, reader} = IndexReader.new(index)
{:ok, searcher} = Searcher.new(reader)
searcher
end

describe "search_with_snippets/5" do
test "returns a snippet map for the requested field", %{test_path: test_path} do
schema =
Schema.new()
|> Schema.add_text_field("title", stored: true, indexed: true)
|> Schema.add_text_field("content", stored: true, indexed: true)

searcher =
searcher_for(test_path, schema, [
%{"title" => "Elixir Guide", "content" => "learn elixir programming with this guide"}
])

{:ok, results} =
Searcher.search_with_snippets(searcher, "elixir", ["title", "content"], ["content"])

assert results["total_hits"] == 1
hit = List.first(results["hits"])
assert is_map(hit["snippets"])
assert Map.has_key?(hit["snippets"], "content")
assert is_binary(hit["snippets"]["content"])
end

test "highlights the matched term with <b> tags", %{test_path: test_path} do
schema = Schema.new() |> Schema.add_text_field("content", stored: true, indexed: true)

searcher =
searcher_for(test_path, schema, [%{"content" => "learn elixir programming today"}])

{:ok, results} =
Searcher.search_with_snippets(searcher, "elixir", ["content"], ["content"])

snippet = results["hits"] |> List.first() |> get_in(["snippets", "content"])
assert snippet =~ "<b>elixir</b>"
end

test "supports multiple snippet fields", %{test_path: test_path} do
schema =
Schema.new()
|> Schema.add_text_field("title", stored: true, indexed: true)
|> Schema.add_text_field("content", stored: true, indexed: true)

searcher =
searcher_for(test_path, schema, [
%{"title" => "elixir basics", "content" => "an elixir tutorial"}
])

{:ok, results} =
Searcher.search_with_snippets(
searcher,
"elixir",
["title", "content"],
["title", "content"]
)

snippets = results["hits"] |> List.first() |> Map.get("snippets")
assert snippets["title"] =~ "<b>elixir</b>"
assert snippets["content"] =~ "<b>elixir</b>"
end

test "max_snippet_chars truncates long content", %{test_path: test_path} do
schema = Schema.new() |> Schema.add_text_field("content", stored: true, indexed: true)

long =
String.duplicate("padding words here ", 40) <>
"elixir " <> String.duplicate("more padding text ", 40)

searcher = searcher_for(test_path, schema, [%{"content" => long}])

{:ok, results} =
Searcher.search_with_snippets(searcher, "elixir", ["content"], ["content"],
max_snippet_chars: 40
)

snippet = results["hits"] |> List.first() |> get_in(["snippets", "content"])
stripped = String.replace(snippet, ~r{</?b>}, "")

assert snippet =~ "<b>elixir</b>"
assert String.length(stripped) < String.length(long)
end

test "respects the limit option", %{test_path: test_path} do
schema = Schema.new() |> Schema.add_text_field("content", stored: true, indexed: true)

docs = for i <- 1..5, do: %{"content" => "elixir document number #{i}"}
searcher = searcher_for(test_path, schema, docs)

{:ok, results} =
Searcher.search_with_snippets(searcher, "elixir", ["content"], ["content"], limit: 2)

# total_hits reflects the number of hits actually returned (capped by limit)
assert results["total_hits"] == 2
assert length(results["hits"]) == 2
end
end
end
Loading