All checks were successful
deploy / deploy (push) Successful in 1m27s
- settings cache: create ETS table in application.ex so it survives GenServer crashes (same pattern as redirects cache) - redirects: remove DB fallback on cache miss — cache is warmed on startup and kept in sync, so a miss means no redirect exists - product listing: exclude provider_data (up to 72KB JSON) and description from listing queries via listing_select/1 - logo/header: select only rendering fields, skip BLOB data column Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
503 lines
13 KiB
Elixir
503 lines
13 KiB
Elixir
defmodule Berrypod.Redirects do
|
|
@moduledoc """
|
|
Manages URL redirects and broken URL tracking.
|
|
|
|
Redirects are cached in ETS for fast lookup on every request.
|
|
The cache is warmed on application start and invalidated on
|
|
any redirect create/update/delete.
|
|
"""
|
|
|
|
import Ecto.Query
|
|
alias Berrypod.Repo
|
|
alias Berrypod.Redirects.{Redirect, BrokenUrl, DeadLink}
|
|
|
|
@table :redirects_cache
|
|
@pubsub_topic "redirects"
|
|
|
|
def subscribe do
|
|
Phoenix.PubSub.subscribe(Berrypod.PubSub, @pubsub_topic)
|
|
end
|
|
|
|
defp broadcast(message) do
|
|
Phoenix.PubSub.broadcast(Berrypod.PubSub, @pubsub_topic, message)
|
|
end
|
|
|
|
# ── ETS cache ──
|
|
|
|
def start_cache do
|
|
create_table()
|
|
warm_cache()
|
|
end
|
|
|
|
def create_table do
|
|
if :ets.whereis(@table) == :undefined do
|
|
:ets.new(@table, [:set, :public, :named_table, read_concurrency: true])
|
|
end
|
|
|
|
@table
|
|
end
|
|
|
|
def warm_cache do
|
|
redirects =
|
|
Repo.all(from r in Redirect, select: {r.from_path, {r.to_path, r.status_code, r.id}})
|
|
|
|
for {from_path, value} <- redirects do
|
|
:ets.insert(@table, {from_path, value})
|
|
end
|
|
|
|
:ok
|
|
end
|
|
|
|
defp invalidate_cache(from_path) do
|
|
:ets.delete(@table, from_path)
|
|
end
|
|
|
|
defp put_cache(from_path, to_path, status_code, id) do
|
|
:ets.insert(@table, {from_path, {to_path, status_code, id}})
|
|
end
|
|
|
|
# ── Lookup ──
|
|
|
|
@doc """
|
|
Looks up a redirect by path. Checks ETS cache first, falls back to DB.
|
|
"""
|
|
def lookup(path) do
|
|
case :ets.lookup(@table, path) do
|
|
[{^path, {to_path, status_code, id}}] ->
|
|
{:ok, %{to_path: to_path, status_code: status_code, id: id}}
|
|
|
|
[] ->
|
|
# Cache is warmed on startup with all redirects and kept in sync on
|
|
# create/update/delete, so a miss here means no redirect exists.
|
|
:not_found
|
|
end
|
|
end
|
|
|
|
# ── Create ──
|
|
|
|
@doc """
|
|
Creates an automatic redirect (from sync events).
|
|
|
|
Flattens redirect chains: if the new redirect's `to_path` is itself
|
|
a `from_path` in an existing redirect, follows it to the final destination.
|
|
Also updates any existing redirects that point to the new `from_path`
|
|
to point directly to the final destination instead.
|
|
|
|
Uses `on_conflict: :nothing` so repeated sync calls are safe.
|
|
"""
|
|
def create_auto(attrs) do
|
|
to_path = resolve_chain(attrs[:to_path] || attrs["to_path"])
|
|
attrs = Map.put(attrs, :to_path, to_path)
|
|
from_path = attrs[:from_path] || attrs["from_path"]
|
|
|
|
# Flatten any existing redirects that point to our from_path
|
|
flatten_incoming(from_path, to_path)
|
|
|
|
changeset = Redirect.changeset(%Redirect{}, attrs)
|
|
|
|
case Repo.insert(changeset, on_conflict: :nothing, conflict_target: :from_path) do
|
|
{:ok, redirect} ->
|
|
put_cache(redirect.from_path, redirect.to_path, redirect.status_code, redirect.id)
|
|
broadcast({:redirects_changed, :created})
|
|
{:ok, redirect}
|
|
|
|
error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Creates a manual redirect (from admin UI).
|
|
"""
|
|
def create_manual(attrs) do
|
|
attrs = Map.put(attrs, :source, "admin")
|
|
to_path = resolve_chain(attrs[:to_path] || attrs["to_path"])
|
|
attrs = Map.put(attrs, :to_path, to_path)
|
|
from_path = attrs[:from_path] || attrs["from_path"]
|
|
|
|
flatten_incoming(from_path, to_path)
|
|
|
|
changeset = Redirect.changeset(%Redirect{}, attrs)
|
|
|
|
case Repo.insert(changeset) do
|
|
{:ok, redirect} ->
|
|
put_cache(redirect.from_path, redirect.to_path, redirect.status_code, redirect.id)
|
|
broadcast({:redirects_changed, :created})
|
|
{:ok, redirect}
|
|
|
|
error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
# Follow redirect chains to find the final destination
|
|
defp resolve_chain(path, seen \\ MapSet.new()) do
|
|
if MapSet.member?(seen, path) do
|
|
# Circular — stop here
|
|
path
|
|
else
|
|
case Repo.one(from r in Redirect, where: r.from_path == ^path, select: r.to_path) do
|
|
nil -> path
|
|
next -> resolve_chain(next, MapSet.put(seen, path))
|
|
end
|
|
end
|
|
end
|
|
|
|
# Update any redirects whose to_path matches old_to to point to new_to instead
|
|
defp flatten_incoming(old_to, new_to) do
|
|
from(r in Redirect, where: r.to_path == ^old_to)
|
|
|> Repo.update_all(set: [to_path: new_to])
|
|
|
|
# Refresh cache for any updated redirects
|
|
from(r in Redirect, where: r.to_path == ^new_to)
|
|
|> Repo.all()
|
|
|> Enum.each(fn r -> put_cache(r.from_path, r.to_path, r.status_code, r.id) end)
|
|
end
|
|
|
|
# ── Update / Delete ──
|
|
|
|
@doc """
|
|
Updates an existing redirect.
|
|
"""
|
|
def update_redirect(%Redirect{} = redirect, attrs) do
|
|
changeset = Redirect.changeset(redirect, attrs)
|
|
|
|
case Repo.update(changeset) do
|
|
{:ok, updated} ->
|
|
# Old from_path may have changed
|
|
if redirect.from_path != updated.from_path do
|
|
invalidate_cache(redirect.from_path)
|
|
end
|
|
|
|
put_cache(updated.from_path, updated.to_path, updated.status_code, updated.id)
|
|
{:ok, updated}
|
|
|
|
error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Deletes a redirect.
|
|
"""
|
|
def delete_redirect(%Redirect{} = redirect) do
|
|
case Repo.delete(redirect) do
|
|
{:ok, deleted} ->
|
|
invalidate_cache(deleted.from_path)
|
|
broadcast({:redirects_changed, :deleted})
|
|
{:ok, deleted}
|
|
|
|
error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Increments the hit count for a redirect.
|
|
"""
|
|
def increment_hit_count(%{id: id}) do
|
|
from(r in Redirect, where: r.id == ^id)
|
|
|> Repo.update_all(inc: [hit_count: 1])
|
|
end
|
|
|
|
# ── Listing ──
|
|
|
|
@doc """
|
|
Lists all redirects, ordered by most recent first.
|
|
"""
|
|
def list_redirects do
|
|
from(r in Redirect, order_by: [desc: r.inserted_at])
|
|
|> Repo.all()
|
|
end
|
|
|
|
def list_redirects_paginated(opts \\ []) do
|
|
from(r in Redirect, order_by: [desc: r.inserted_at])
|
|
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
|
|
end
|
|
|
|
@doc """
|
|
Gets a single redirect by ID.
|
|
"""
|
|
def get_redirect!(id), do: Repo.get!(Redirect, id)
|
|
|
|
# ── Broken URLs ──
|
|
|
|
@doc """
|
|
Records or updates a broken URL entry.
|
|
|
|
If the path already exists, increments the 404 count and updates last_seen_at.
|
|
"""
|
|
def record_broken_url(path, prior_hits) do
|
|
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
|
|
|
result =
|
|
case Repo.one(from b in BrokenUrl, where: b.path == ^path) do
|
|
nil ->
|
|
%BrokenUrl{}
|
|
|> BrokenUrl.changeset(%{
|
|
path: path,
|
|
prior_analytics_hits: prior_hits,
|
|
first_seen_at: now,
|
|
last_seen_at: now
|
|
})
|
|
|> Repo.insert()
|
|
|
|
%{status: status} when status in ["ignored", "resolved"] ->
|
|
{:ok, :skipped}
|
|
|
|
existing ->
|
|
existing
|
|
|> BrokenUrl.changeset(%{
|
|
recent_404_count: existing.recent_404_count + 1,
|
|
last_seen_at: now
|
|
})
|
|
|> Repo.update()
|
|
end
|
|
|
|
case result do
|
|
{:ok, %BrokenUrl{}} -> broadcast({:broken_urls_changed, path})
|
|
_ -> :ok
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
@doc """
|
|
Lists broken URLs, sorted by prior analytics hits (highest impact first).
|
|
"""
|
|
def list_broken_urls(status \\ "pending") do
|
|
from(b in BrokenUrl,
|
|
where: b.status == ^status,
|
|
order_by: [desc: b.prior_analytics_hits, desc: b.recent_404_count]
|
|
)
|
|
|> Repo.all()
|
|
end
|
|
|
|
def list_broken_urls_paginated(opts \\ []) do
|
|
status = opts[:status] || "pending"
|
|
|
|
from(b in BrokenUrl,
|
|
where: b.status == ^status,
|
|
order_by: [desc: b.prior_analytics_hits, desc: b.recent_404_count]
|
|
)
|
|
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
|
|
end
|
|
|
|
@doc """
|
|
Resolves a broken URL by creating a redirect and updating the record.
|
|
"""
|
|
def resolve_broken_url(%BrokenUrl{} = broken_url, to_path) do
|
|
case create_manual(%{from_path: broken_url.path, to_path: to_path}) do
|
|
{:ok, redirect} ->
|
|
broken_url
|
|
|> BrokenUrl.changeset(%{status: "resolved", resolved_redirect_id: redirect.id})
|
|
|> Repo.update()
|
|
|
|
error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Marks a broken URL as ignored.
|
|
"""
|
|
def ignore_broken_url(%BrokenUrl{} = broken_url) do
|
|
result =
|
|
broken_url
|
|
|> BrokenUrl.changeset(%{status: "ignored"})
|
|
|> Repo.update()
|
|
|
|
case result do
|
|
{:ok, _} -> broadcast({:broken_urls_changed, broken_url.path})
|
|
_ -> :ok
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
@doc """
|
|
Marks a broken URL as resolved (e.g. after creating a redirect for it).
|
|
"""
|
|
def mark_broken_url_resolved(%BrokenUrl{} = broken_url) do
|
|
result =
|
|
broken_url
|
|
|> BrokenUrl.changeset(%{status: "resolved"})
|
|
|> Repo.update()
|
|
|
|
case result do
|
|
{:ok, _} -> broadcast({:broken_urls_changed, broken_url.path})
|
|
_ -> :ok
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
@doc """
|
|
Gets a broken URL by ID.
|
|
"""
|
|
def get_broken_url!(id), do: Repo.get!(BrokenUrl, id)
|
|
|
|
@doc """
|
|
Gets a pending broken URL by path, or nil.
|
|
"""
|
|
def get_broken_url_by_path(path) do
|
|
Repo.one(from b in BrokenUrl, where: b.path == ^path and b.status == "pending")
|
|
end
|
|
|
|
# ── Auto-resolve ──
|
|
|
|
@doc """
|
|
Attempts to auto-resolve a broken product URL using FTS5 search.
|
|
|
|
Converts the slug back to words and searches the product index.
|
|
If exactly one result is found, creates a redirect and marks
|
|
the broken URL as resolved. Multiple or zero results are left
|
|
for admin review.
|
|
"""
|
|
def attempt_auto_resolve("/products/" <> old_slug = path) do
|
|
query = old_slug |> String.replace("-", " ") |> String.trim()
|
|
|
|
case Berrypod.Search.search(query) do
|
|
[%{slug: new_slug}] when new_slug != old_slug ->
|
|
case create_auto(%{
|
|
from_path: path,
|
|
to_path: "/products/#{new_slug}",
|
|
source: "analytics_auto_resolved"
|
|
}) do
|
|
{:ok, _redirect} ->
|
|
if broken = get_broken_url_by_path(path) do
|
|
mark_broken_url_resolved(broken)
|
|
end
|
|
|
|
:resolved
|
|
|
|
_error ->
|
|
:no_match
|
|
end
|
|
|
|
_ ->
|
|
:no_match
|
|
end
|
|
end
|
|
|
|
def attempt_auto_resolve(_path), do: :no_match
|
|
|
|
# ── Pruning ──
|
|
|
|
@doc """
|
|
Prunes auto-created redirects with zero hits older than the given number of days.
|
|
"""
|
|
def prune_stale_redirects(max_age_days \\ 90) do
|
|
{count, _} =
|
|
from(r in Redirect,
|
|
where: r.source in ["auto_slug_change", "auto_product_deleted"] and r.hit_count == 0,
|
|
where: r.inserted_at < ago(^max_age_days, "day")
|
|
)
|
|
|> Repo.delete_all()
|
|
|
|
# Rebuild cache if anything was pruned
|
|
if count > 0, do: warm_cache()
|
|
|
|
{:ok, count}
|
|
end
|
|
|
|
# ── Dead links ──
|
|
|
|
@doc """
|
|
Lists dead links, paginated, ordered by most recent first.
|
|
"""
|
|
def list_dead_links_paginated(opts \\ []) do
|
|
status = opts[:status] || "broken"
|
|
|
|
from(d in DeadLink,
|
|
where: d.status == ^status,
|
|
order_by: [desc: d.last_checked_at]
|
|
)
|
|
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
|
|
end
|
|
|
|
@doc """
|
|
Counts dead links with status "broken".
|
|
"""
|
|
def count_dead_links do
|
|
Repo.aggregate(from(d in DeadLink, where: d.status == "broken"), :count)
|
|
end
|
|
|
|
@doc """
|
|
Gets a dead link by ID.
|
|
"""
|
|
def get_dead_link!(id), do: Repo.get!(DeadLink, id)
|
|
|
|
@doc """
|
|
Gets a dead link by URL, or nil.
|
|
"""
|
|
def get_dead_link_by_url(url) do
|
|
Repo.one(from d in DeadLink, where: d.url == ^url)
|
|
end
|
|
|
|
@doc """
|
|
Creates or updates a dead link record for a broken URL.
|
|
"""
|
|
def upsert_dead_link(attrs) do
|
|
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
|
attrs = Map.put(attrs, :last_checked_at, now)
|
|
|
|
case get_dead_link_by_url(attrs[:url] || attrs["url"]) do
|
|
nil ->
|
|
%DeadLink{}
|
|
|> DeadLink.changeset(attrs)
|
|
|> Repo.insert()
|
|
|
|
existing ->
|
|
# Don't overwrite an "ignored" status
|
|
attrs =
|
|
if existing.status == "ignored",
|
|
do: Map.delete(attrs, :status),
|
|
else: attrs
|
|
|
|
existing
|
|
|> DeadLink.changeset(attrs)
|
|
|> Repo.update()
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Removes a dead link record (URL is now healthy).
|
|
"""
|
|
def clear_healthy_link(url) do
|
|
case get_dead_link_by_url(url) do
|
|
nil -> :ok
|
|
%{status: "ignored"} -> :ok
|
|
dead_link -> Repo.delete(dead_link)
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Marks a dead link as ignored.
|
|
"""
|
|
def ignore_dead_link(%DeadLink{} = dead_link) do
|
|
dead_link
|
|
|> DeadLink.changeset(%{status: "ignored"})
|
|
|> Repo.update()
|
|
end
|
|
|
|
@doc """
|
|
Deletes a dead link.
|
|
"""
|
|
def delete_dead_link(%DeadLink{} = dead_link) do
|
|
Repo.delete(dead_link)
|
|
end
|
|
|
|
@doc """
|
|
Removes dead link records for URLs no longer present in any content.
|
|
"""
|
|
def prune_orphan_dead_links(current_urls) do
|
|
url_set = MapSet.new(current_urls)
|
|
|
|
from(d in DeadLink)
|
|
|> Repo.all()
|
|
|> Enum.filter(fn d -> not MapSet.member?(url_set, d.url) end)
|
|
|> Enum.each(&Repo.delete/1)
|
|
end
|
|
end
|