berrypod/lib/simpleshop_theme/search.ex
jamey edcbc596e3 add LIKE substring fallback to search and update plan statuses
FTS5 prefix matching misses mid-word substrings (e.g. "ebook" in
"notebook"). When FTS5 returns zero results, fall back to LIKE
query on title and category with proper wildcard escaping. 4 new
tests, 757 total.

Also marks completed plan files (search, admin-redesign,
setup-wizard, products-context) with correct status.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 09:09:10 +00:00

219 lines
5.6 KiB
Elixir

defmodule SimpleshopTheme.Search do
@moduledoc """
Full-text product search backed by SQLite FTS5.
Uses a contentless FTS5 index with BM25 ranking. The index is rebuilt
from the products table after each provider sync.
"""
import Ecto.Query
alias SimpleshopTheme.Products.Product
alias SimpleshopTheme.Repo
@listing_preloads [images: :image]
# BM25 column weights: title(10), category(5), variant_info(3), description(1)
@bm25_weights "10.0, 5.0, 3.0, 1.0"
@doc """
Searches products by query string. Returns ranked list of Product structs
with listing preloads, or empty list for blank/short queries.
"""
def search(query) when is_binary(query) do
query = String.trim(query)
if String.length(query) < 2 do
[]
else
fts_query = build_fts_query(query)
case search_fts(fts_query) do
[] -> search_like(query)
results -> results
end
end
end
def search(_), do: []
@doc """
Rebuilds the entire FTS5 index from visible, active products.
"""
def rebuild_index do
Repo.transaction(fn ->
# Clear existing index data
Repo.query!("DELETE FROM products_search_map")
Repo.query!("DELETE FROM products_search")
# Load all visible products with variants for indexing
products =
Product
|> where([p], p.visible == true and p.status == "active")
|> preload([:variants])
|> Repo.all()
Enum.each(products, &insert_into_index/1)
end)
end
@doc """
Indexes or reindexes a single product.
Removes existing entry first if present.
"""
def index_product(%Product{} = product) do
product = Repo.preload(product, [:variants], force: true)
Repo.transaction(fn ->
remove_from_index(product.id)
insert_into_index(product)
end)
end
@doc """
Removes a product from the search index.
"""
def remove_product(product_id) do
remove_from_index(product_id)
end
# Build an FTS5 MATCH query from user input.
# Strips special chars, splits into tokens, adds * prefix match to last token.
defp build_fts_query(input) do
tokens =
input
|> String.replace(~r/[^\w\s]/, "")
|> String.split(~r/\s+/, trim: true)
case tokens do
[] ->
nil
tokens ->
{complete, [last]} = Enum.split(tokens, -1)
parts =
Enum.map(complete, &~s("#{&1}")) ++
[~s("#{last}" *)]
Enum.join(parts, " ")
end
end
defp search_fts(nil), do: []
defp search_fts(fts_query) do
result =
Repo.query!(
"""
SELECT m.product_id, bm25(products_search, #{@bm25_weights}) AS rank
FROM products_search
JOIN products_search_map m ON m.rowid = products_search.rowid
WHERE products_search MATCH ?1
ORDER BY rank
LIMIT 20
""",
[fts_query]
)
product_ids = Enum.map(result.rows, fn [id, _rank] -> id end)
if product_ids == [] do
[]
else
# Fetch full structs preserving rank order
products_by_id =
Product
|> where([p], p.id in ^product_ids)
|> where([p], p.visible == true and p.status == "active")
|> preload(^@listing_preloads)
|> Repo.all()
|> Map.new(&{&1.id, &1})
Enum.flat_map(product_ids, fn id ->
case Map.get(products_by_id, id) do
nil -> []
product -> [product]
end
end)
end
end
# Substring fallback when FTS5 prefix matching returns nothing
defp search_like(query) do
pattern = "%#{sanitize_like(query)}%"
Product
|> where([p], p.visible == true and p.status == "active")
|> where([p], like(p.title, ^pattern) or like(p.category, ^pattern))
|> order_by([p], p.title)
|> limit(20)
|> preload(^@listing_preloads)
|> Repo.all()
end
defp sanitize_like(input) do
input
|> String.replace("\\", "\\\\")
|> String.replace("%", "\\%")
|> String.replace("_", "\\_")
end
defp insert_into_index(%Product{} = product) do
Repo.query!(
"INSERT INTO products_search_map (product_id) VALUES (?1)",
[product.id]
)
%{rows: [[rowid]]} = Repo.query!("SELECT last_insert_rowid()")
variant_info = build_variant_info(product.variants || [])
description = strip_html(product.description || "")
Repo.query!(
"""
INSERT INTO products_search (rowid, title, category, variant_info, description)
VALUES (?1, ?2, ?3, ?4, ?5)
""",
[rowid, product.title || "", product.category || "", variant_info, description]
)
end
defp remove_from_index(product_id) do
case Repo.query!(
"SELECT rowid FROM products_search_map WHERE product_id = ?1",
[product_id]
) do
%{rows: [[rowid]]} ->
Repo.query!("DELETE FROM products_search WHERE rowid = ?1", [rowid])
Repo.query!("DELETE FROM products_search_map WHERE rowid = ?1", [rowid])
_ ->
:ok
end
end
# Build searchable variant text from enabled variants
defp build_variant_info(variants) do
variants
|> Enum.filter(& &1.is_enabled)
|> Enum.flat_map(fn v -> [v.title | Map.values(v.options || %{})] end)
|> Enum.uniq()
|> Enum.join(" ")
end
# Strip HTML tags and decode common entities
defp strip_html(html) do
html
|> String.replace(~r/<[^>]+>/, " ")
|> String.replace("&amp;", "&")
|> String.replace("&lt;", "<")
|> String.replace("&gt;", ">")
|> String.replace("&quot;", "\"")
|> String.replace("&#39;", "'")
|> String.replace("&nbsp;", " ")
|> String.replace(~r/\s+/, " ")
|> String.trim()
end
end