berrypod/lib/berrypod/redirects.ex
jamey b235219aee
All checks were successful
deploy / deploy (push) Successful in 3m42s
add dead link monitoring for outgoing content links
Scans page blocks and nav items for broken URLs (internal via DB
lookup, external via HTTP HEAD). Daily Oban cron at 03:30, plus
on-demand checks when pages are saved. Admin UI tab on redirects
page with re-check, ignore, and clickable source links.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 13:00:59 +00:00

510 lines
13 KiB
Elixir

defmodule Berrypod.Redirects do
@moduledoc """
Manages URL redirects and broken URL tracking.
Redirects are cached in ETS for fast lookup on every request.
The cache is warmed on application start and invalidated on
any redirect create/update/delete.
"""
import Ecto.Query
alias Berrypod.Repo
alias Berrypod.Redirects.{Redirect, BrokenUrl, DeadLink}
@table :redirects_cache
@pubsub_topic "redirects"
def subscribe do
Phoenix.PubSub.subscribe(Berrypod.PubSub, @pubsub_topic)
end
defp broadcast(message) do
Phoenix.PubSub.broadcast(Berrypod.PubSub, @pubsub_topic, message)
end
# ── ETS cache ──
def start_cache do
create_table()
warm_cache()
end
def create_table do
if :ets.whereis(@table) == :undefined do
:ets.new(@table, [:set, :public, :named_table, read_concurrency: true])
end
@table
end
def warm_cache do
redirects =
Repo.all(from r in Redirect, select: {r.from_path, {r.to_path, r.status_code, r.id}})
for {from_path, value} <- redirects do
:ets.insert(@table, {from_path, value})
end
:ok
end
defp invalidate_cache(from_path) do
:ets.delete(@table, from_path)
end
defp put_cache(from_path, to_path, status_code, id) do
:ets.insert(@table, {from_path, {to_path, status_code, id}})
end
# ── Lookup ──
@doc """
Looks up a redirect by path. Checks ETS cache first, falls back to DB.
"""
def lookup(path) do
case :ets.lookup(@table, path) do
[{^path, {to_path, status_code, id}}] ->
{:ok, %{to_path: to_path, status_code: status_code, id: id}}
[] ->
case Repo.one(from r in Redirect, where: r.from_path == ^path) do
nil ->
:not_found
redirect ->
put_cache(redirect.from_path, redirect.to_path, redirect.status_code, redirect.id)
{:ok,
%{to_path: redirect.to_path, status_code: redirect.status_code, id: redirect.id}}
end
end
end
# ── Create ──
@doc """
Creates an automatic redirect (from sync events).
Flattens redirect chains: if the new redirect's `to_path` is itself
a `from_path` in an existing redirect, follows it to the final destination.
Also updates any existing redirects that point to the new `from_path`
to point directly to the final destination instead.
Uses `on_conflict: :nothing` so repeated sync calls are safe.
"""
def create_auto(attrs) do
to_path = resolve_chain(attrs[:to_path] || attrs["to_path"])
attrs = Map.put(attrs, :to_path, to_path)
from_path = attrs[:from_path] || attrs["from_path"]
# Flatten any existing redirects that point to our from_path
flatten_incoming(from_path, to_path)
changeset = Redirect.changeset(%Redirect{}, attrs)
case Repo.insert(changeset, on_conflict: :nothing, conflict_target: :from_path) do
{:ok, redirect} ->
put_cache(redirect.from_path, redirect.to_path, redirect.status_code, redirect.id)
broadcast({:redirects_changed, :created})
{:ok, redirect}
error ->
error
end
end
@doc """
Creates a manual redirect (from admin UI).
"""
def create_manual(attrs) do
attrs = Map.put(attrs, :source, "admin")
to_path = resolve_chain(attrs[:to_path] || attrs["to_path"])
attrs = Map.put(attrs, :to_path, to_path)
from_path = attrs[:from_path] || attrs["from_path"]
flatten_incoming(from_path, to_path)
changeset = Redirect.changeset(%Redirect{}, attrs)
case Repo.insert(changeset) do
{:ok, redirect} ->
put_cache(redirect.from_path, redirect.to_path, redirect.status_code, redirect.id)
broadcast({:redirects_changed, :created})
{:ok, redirect}
error ->
error
end
end
# Follow redirect chains to find the final destination
defp resolve_chain(path, seen \\ MapSet.new()) do
if MapSet.member?(seen, path) do
# Circular — stop here
path
else
case Repo.one(from r in Redirect, where: r.from_path == ^path, select: r.to_path) do
nil -> path
next -> resolve_chain(next, MapSet.put(seen, path))
end
end
end
# Update any redirects whose to_path matches old_to to point to new_to instead
defp flatten_incoming(old_to, new_to) do
from(r in Redirect, where: r.to_path == ^old_to)
|> Repo.update_all(set: [to_path: new_to])
# Refresh cache for any updated redirects
from(r in Redirect, where: r.to_path == ^new_to)
|> Repo.all()
|> Enum.each(fn r -> put_cache(r.from_path, r.to_path, r.status_code, r.id) end)
end
# ── Update / Delete ──
@doc """
Updates an existing redirect.
"""
def update_redirect(%Redirect{} = redirect, attrs) do
changeset = Redirect.changeset(redirect, attrs)
case Repo.update(changeset) do
{:ok, updated} ->
# Old from_path may have changed
if redirect.from_path != updated.from_path do
invalidate_cache(redirect.from_path)
end
put_cache(updated.from_path, updated.to_path, updated.status_code, updated.id)
{:ok, updated}
error ->
error
end
end
@doc """
Deletes a redirect.
"""
def delete_redirect(%Redirect{} = redirect) do
case Repo.delete(redirect) do
{:ok, deleted} ->
invalidate_cache(deleted.from_path)
broadcast({:redirects_changed, :deleted})
{:ok, deleted}
error ->
error
end
end
@doc """
Increments the hit count for a redirect.
"""
def increment_hit_count(%{id: id}) do
from(r in Redirect, where: r.id == ^id)
|> Repo.update_all(inc: [hit_count: 1])
end
# ── Listing ──
@doc """
Lists all redirects, ordered by most recent first.
"""
def list_redirects do
from(r in Redirect, order_by: [desc: r.inserted_at])
|> Repo.all()
end
def list_redirects_paginated(opts \\ []) do
from(r in Redirect, order_by: [desc: r.inserted_at])
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
end
@doc """
Gets a single redirect by ID.
"""
def get_redirect!(id), do: Repo.get!(Redirect, id)
# ── Broken URLs ──
@doc """
Records or updates a broken URL entry.
If the path already exists, increments the 404 count and updates last_seen_at.
"""
def record_broken_url(path, prior_hits) do
now = DateTime.utc_now() |> DateTime.truncate(:second)
result =
case Repo.one(from b in BrokenUrl, where: b.path == ^path) do
nil ->
%BrokenUrl{}
|> BrokenUrl.changeset(%{
path: path,
prior_analytics_hits: prior_hits,
first_seen_at: now,
last_seen_at: now
})
|> Repo.insert()
%{status: status} when status in ["ignored", "resolved"] ->
{:ok, :skipped}
existing ->
existing
|> BrokenUrl.changeset(%{
recent_404_count: existing.recent_404_count + 1,
last_seen_at: now
})
|> Repo.update()
end
case result do
{:ok, %BrokenUrl{}} -> broadcast({:broken_urls_changed, path})
_ -> :ok
end
result
end
@doc """
Lists broken URLs, sorted by prior analytics hits (highest impact first).
"""
def list_broken_urls(status \\ "pending") do
from(b in BrokenUrl,
where: b.status == ^status,
order_by: [desc: b.prior_analytics_hits, desc: b.recent_404_count]
)
|> Repo.all()
end
def list_broken_urls_paginated(opts \\ []) do
status = opts[:status] || "pending"
from(b in BrokenUrl,
where: b.status == ^status,
order_by: [desc: b.prior_analytics_hits, desc: b.recent_404_count]
)
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
end
@doc """
Resolves a broken URL by creating a redirect and updating the record.
"""
def resolve_broken_url(%BrokenUrl{} = broken_url, to_path) do
case create_manual(%{from_path: broken_url.path, to_path: to_path}) do
{:ok, redirect} ->
broken_url
|> BrokenUrl.changeset(%{status: "resolved", resolved_redirect_id: redirect.id})
|> Repo.update()
error ->
error
end
end
@doc """
Marks a broken URL as ignored.
"""
def ignore_broken_url(%BrokenUrl{} = broken_url) do
result =
broken_url
|> BrokenUrl.changeset(%{status: "ignored"})
|> Repo.update()
case result do
{:ok, _} -> broadcast({:broken_urls_changed, broken_url.path})
_ -> :ok
end
result
end
@doc """
Marks a broken URL as resolved (e.g. after creating a redirect for it).
"""
def mark_broken_url_resolved(%BrokenUrl{} = broken_url) do
result =
broken_url
|> BrokenUrl.changeset(%{status: "resolved"})
|> Repo.update()
case result do
{:ok, _} -> broadcast({:broken_urls_changed, broken_url.path})
_ -> :ok
end
result
end
@doc """
Gets a broken URL by ID.
"""
def get_broken_url!(id), do: Repo.get!(BrokenUrl, id)
@doc """
Gets a pending broken URL by path, or nil.
"""
def get_broken_url_by_path(path) do
Repo.one(from b in BrokenUrl, where: b.path == ^path and b.status == "pending")
end
# ── Auto-resolve ──
@doc """
Attempts to auto-resolve a broken product URL using FTS5 search.
Converts the slug back to words and searches the product index.
If exactly one result is found, creates a redirect and marks
the broken URL as resolved. Multiple or zero results are left
for admin review.
"""
def attempt_auto_resolve("/products/" <> old_slug = path) do
query = old_slug |> String.replace("-", " ") |> String.trim()
case Berrypod.Search.search(query) do
[%{slug: new_slug}] when new_slug != old_slug ->
case create_auto(%{
from_path: path,
to_path: "/products/#{new_slug}",
source: "analytics_auto_resolved"
}) do
{:ok, _redirect} ->
if broken = get_broken_url_by_path(path) do
mark_broken_url_resolved(broken)
end
:resolved
_error ->
:no_match
end
_ ->
:no_match
end
end
def attempt_auto_resolve(_path), do: :no_match
# ── Pruning ──
@doc """
Prunes auto-created redirects with zero hits older than the given number of days.
"""
def prune_stale_redirects(max_age_days \\ 90) do
{count, _} =
from(r in Redirect,
where: r.source in ["auto_slug_change", "auto_product_deleted"] and r.hit_count == 0,
where: r.inserted_at < ago(^max_age_days, "day")
)
|> Repo.delete_all()
# Rebuild cache if anything was pruned
if count > 0, do: warm_cache()
{:ok, count}
end
# ── Dead links ──
@doc """
Lists dead links, paginated, ordered by most recent first.
"""
def list_dead_links_paginated(opts \\ []) do
status = opts[:status] || "broken"
from(d in DeadLink,
where: d.status == ^status,
order_by: [desc: d.last_checked_at]
)
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
end
@doc """
Counts dead links with status "broken".
"""
def count_dead_links do
Repo.aggregate(from(d in DeadLink, where: d.status == "broken"), :count)
end
@doc """
Gets a dead link by ID.
"""
def get_dead_link!(id), do: Repo.get!(DeadLink, id)
@doc """
Gets a dead link by URL, or nil.
"""
def get_dead_link_by_url(url) do
Repo.one(from d in DeadLink, where: d.url == ^url)
end
@doc """
Creates or updates a dead link record for a broken URL.
"""
def upsert_dead_link(attrs) do
now = DateTime.utc_now() |> DateTime.truncate(:second)
attrs = Map.put(attrs, :last_checked_at, now)
case get_dead_link_by_url(attrs[:url] || attrs["url"]) do
nil ->
%DeadLink{}
|> DeadLink.changeset(attrs)
|> Repo.insert()
existing ->
# Don't overwrite an "ignored" status
attrs =
if existing.status == "ignored",
do: Map.delete(attrs, :status),
else: attrs
existing
|> DeadLink.changeset(attrs)
|> Repo.update()
end
end
@doc """
Removes a dead link record (URL is now healthy).
"""
def clear_healthy_link(url) do
case get_dead_link_by_url(url) do
nil -> :ok
%{status: "ignored"} -> :ok
dead_link -> Repo.delete(dead_link)
end
end
@doc """
Marks a dead link as ignored.
"""
def ignore_dead_link(%DeadLink{} = dead_link) do
dead_link
|> DeadLink.changeset(%{status: "ignored"})
|> Repo.update()
end
@doc """
Deletes a dead link.
"""
def delete_dead_link(%DeadLink{} = dead_link) do
Repo.delete(dead_link)
end
@doc """
Removes dead link records for URLs no longer present in any content.
"""
def prune_orphan_dead_links(current_urls) do
url_set = MapSet.new(current_urls)
from(d in DeadLink)
|> Repo.all()
|> Enum.filter(fn d -> not MapSet.member?(url_set, d.url) end)
|> Enum.each(&Repo.delete/1)
end
end