add dead link monitoring for outgoing content links
All checks were successful
deploy / deploy (push) Successful in 3m42s
All checks were successful
deploy / deploy (push) Successful in 3m42s
Scans page blocks and nav items for broken URLs (internal via DB lookup, external via HTTP HEAD). Daily Oban cron at 03:30, plus on-demand checks when pages are saved. Admin UI tab on redirects page with re-check, ignore, and clickable source links. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -122,6 +122,7 @@ defmodule Berrypod.Pages do
|
||||
case result do
|
||||
{:ok, page} ->
|
||||
PageCache.invalidate(slug)
|
||||
enqueue_link_check(slug)
|
||||
{:ok, page}
|
||||
|
||||
error ->
|
||||
@@ -176,6 +177,7 @@ defmodule Berrypod.Pages do
|
||||
})
|
||||
end
|
||||
|
||||
enqueue_link_check(updated.slug)
|
||||
{:ok, updated}
|
||||
|
||||
error ->
|
||||
@@ -256,6 +258,10 @@ defmodule Berrypod.Pages do
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────
|
||||
|
||||
defp enqueue_link_check(slug) do
|
||||
Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{"page_slug" => slug}))
|
||||
end
|
||||
|
||||
defp page_to_map(%Page{} = page) do
|
||||
%{
|
||||
slug: page.slug,
|
||||
|
||||
@@ -9,7 +9,7 @@ defmodule Berrypod.Redirects do
|
||||
|
||||
import Ecto.Query
|
||||
alias Berrypod.Repo
|
||||
alias Berrypod.Redirects.{Redirect, BrokenUrl}
|
||||
alias Berrypod.Redirects.{Redirect, BrokenUrl, DeadLink}
|
||||
|
||||
@table :redirects_cache
|
||||
@pubsub_topic "redirects"
|
||||
@@ -407,4 +407,103 @@ defmodule Berrypod.Redirects do
|
||||
|
||||
{:ok, count}
|
||||
end
|
||||
|
||||
# ── Dead links ──
|
||||
|
||||
@doc """
|
||||
Lists dead links, paginated, ordered by most recent first.
|
||||
"""
|
||||
def list_dead_links_paginated(opts \\ []) do
|
||||
status = opts[:status] || "broken"
|
||||
|
||||
from(d in DeadLink,
|
||||
where: d.status == ^status,
|
||||
order_by: [desc: d.last_checked_at]
|
||||
)
|
||||
|> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Counts dead links with status "broken".
|
||||
"""
|
||||
def count_dead_links do
|
||||
Repo.aggregate(from(d in DeadLink, where: d.status == "broken"), :count)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Gets a dead link by ID.
|
||||
"""
|
||||
def get_dead_link!(id), do: Repo.get!(DeadLink, id)
|
||||
|
||||
@doc """
|
||||
Gets a dead link by URL, or nil.
|
||||
"""
|
||||
def get_dead_link_by_url(url) do
|
||||
Repo.one(from d in DeadLink, where: d.url == ^url)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Creates or updates a dead link record for a broken URL.
|
||||
"""
|
||||
def upsert_dead_link(attrs) do
|
||||
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
||||
attrs = Map.put(attrs, :last_checked_at, now)
|
||||
|
||||
case get_dead_link_by_url(attrs[:url] || attrs["url"]) do
|
||||
nil ->
|
||||
%DeadLink{}
|
||||
|> DeadLink.changeset(attrs)
|
||||
|> Repo.insert()
|
||||
|
||||
existing ->
|
||||
# Don't overwrite an "ignored" status
|
||||
attrs =
|
||||
if existing.status == "ignored",
|
||||
do: Map.delete(attrs, :status),
|
||||
else: attrs
|
||||
|
||||
existing
|
||||
|> DeadLink.changeset(attrs)
|
||||
|> Repo.update()
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Removes a dead link record (URL is now healthy).
|
||||
"""
|
||||
def clear_healthy_link(url) do
|
||||
case get_dead_link_by_url(url) do
|
||||
nil -> :ok
|
||||
%{status: "ignored"} -> :ok
|
||||
dead_link -> Repo.delete(dead_link)
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Marks a dead link as ignored.
|
||||
"""
|
||||
def ignore_dead_link(%DeadLink{} = dead_link) do
|
||||
dead_link
|
||||
|> DeadLink.changeset(%{status: "ignored"})
|
||||
|> Repo.update()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Deletes a dead link.
|
||||
"""
|
||||
def delete_dead_link(%DeadLink{} = dead_link) do
|
||||
Repo.delete(dead_link)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Removes dead link records for URLs no longer present in any content.
|
||||
"""
|
||||
def prune_orphan_dead_links(current_urls) do
|
||||
url_set = MapSet.new(current_urls)
|
||||
|
||||
from(d in DeadLink)
|
||||
|> Repo.all()
|
||||
|> Enum.filter(fn d -> not MapSet.member?(url_set, d.url) end)
|
||||
|> Enum.each(&Repo.delete/1)
|
||||
end
|
||||
end
|
||||
|
||||
30
lib/berrypod/redirects/dead_link.ex
Normal file
30
lib/berrypod/redirects/dead_link.ex
Normal file
@@ -0,0 +1,30 @@
|
||||
defmodule Berrypod.Redirects.DeadLink do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
@primary_key {:id, :binary_id, autogenerate: true}
|
||||
@foreign_key_type :binary_id
|
||||
|
||||
@url_types ~w(internal external)
|
||||
@statuses ~w(broken ignored)
|
||||
|
||||
schema "dead_links" do
|
||||
field :url, :string
|
||||
field :url_type, :string, default: "external"
|
||||
field :status, :string, default: "broken"
|
||||
field :http_status, :integer
|
||||
field :error, :string
|
||||
field :last_checked_at, :utc_datetime
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
def changeset(dead_link, attrs) do
|
||||
dead_link
|
||||
|> cast(attrs, [:url, :url_type, :status, :http_status, :error, :last_checked_at])
|
||||
|> validate_required([:url, :url_type, :last_checked_at])
|
||||
|> validate_inclusion(:url_type, @url_types)
|
||||
|> validate_inclusion(:status, @statuses)
|
||||
|> unique_constraint(:url)
|
||||
end
|
||||
end
|
||||
127
lib/berrypod/redirects/link_checker.ex
Normal file
127
lib/berrypod/redirects/link_checker.ex
Normal file
@@ -0,0 +1,127 @@
|
||||
defmodule Berrypod.Redirects.LinkChecker do
|
||||
@moduledoc """
|
||||
Validates URLs found by the link scanner.
|
||||
|
||||
Internal links are checked against the router and database.
|
||||
External links are checked via HTTP HEAD requests.
|
||||
"""
|
||||
|
||||
alias Berrypod.Products
|
||||
alias Berrypod.Pages
|
||||
|
||||
# Static routes that are always valid
|
||||
@static_paths ~w(
|
||||
/ /about /contact /delivery /privacy /terms /cart /search /coming-soon
|
||||
/collections/all
|
||||
)
|
||||
|
||||
@doc """
|
||||
Checks whether an internal path is valid.
|
||||
|
||||
Returns `:ok` or `{:error, reason}`.
|
||||
"""
|
||||
def check_internal(path) do
|
||||
cond do
|
||||
path in @static_paths ->
|
||||
:ok
|
||||
|
||||
match?("/products/" <> _, path) ->
|
||||
slug = String.replace_prefix(path, "/products/", "")
|
||||
if Products.get_visible_product(slug), do: :ok, else: {:error, "product not found"}
|
||||
|
||||
match?("/collections/" <> _, path) ->
|
||||
# Collection routes are always valid (they filter, never 404)
|
||||
:ok
|
||||
|
||||
true ->
|
||||
# Could be a custom page — check the slug (strip leading /)
|
||||
slug = String.replace_prefix(path, "/", "")
|
||||
|
||||
if Pages.get_page(slug) do
|
||||
:ok
|
||||
else
|
||||
{:error, "page not found"}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Checks whether an external URL is reachable.
|
||||
|
||||
Uses HTTP HEAD with a fallback to GET (some servers reject HEAD).
|
||||
Returns `:ok`, `{:error, status, reason}`, or `{:error, reason}`.
|
||||
"""
|
||||
def check_external(url) do
|
||||
req = build_req()
|
||||
|
||||
case Req.head(req, url: url) do
|
||||
{:ok, %{status: status}} when status in 200..399 ->
|
||||
:ok
|
||||
|
||||
{:ok, %{status: 405}} ->
|
||||
# HEAD not allowed, try GET
|
||||
check_external_get(req, url)
|
||||
|
||||
{:ok, %{status: status}} ->
|
||||
{:error, status, status_reason(status)}
|
||||
|
||||
{:error, %{reason: reason}} ->
|
||||
{:error, format_error(reason)}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, format_error(reason)}
|
||||
end
|
||||
end
|
||||
|
||||
defp check_external_get(req, url) do
|
||||
case Req.get(req, url: url) do
|
||||
{:ok, %{status: status}} when status in 200..399 ->
|
||||
:ok
|
||||
|
||||
{:ok, %{status: status}} ->
|
||||
{:error, status, status_reason(status)}
|
||||
|
||||
{:error, %{reason: reason}} ->
|
||||
{:error, format_error(reason)}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, format_error(reason)}
|
||||
end
|
||||
end
|
||||
|
||||
defp build_req do
|
||||
Req.new(
|
||||
connect_options: [timeout: 5_000],
|
||||
receive_timeout: 5_000,
|
||||
max_retries: 0,
|
||||
redirect: true,
|
||||
max_redirects: 5,
|
||||
headers: [{"user-agent", "Berrypod Link Checker/1.0"}]
|
||||
)
|
||||
|> maybe_attach_test_plug()
|
||||
end
|
||||
|
||||
defp maybe_attach_test_plug(req) do
|
||||
if plug = Application.get_env(:berrypod, :link_checker_plug) do
|
||||
Req.merge(req, plug: plug)
|
||||
else
|
||||
req
|
||||
end
|
||||
end
|
||||
|
||||
defp status_reason(404), do: "not found"
|
||||
defp status_reason(403), do: "forbidden"
|
||||
defp status_reason(410), do: "gone"
|
||||
defp status_reason(500), do: "server error"
|
||||
defp status_reason(502), do: "bad gateway"
|
||||
defp status_reason(503), do: "service unavailable"
|
||||
defp status_reason(status), do: "HTTP #{status}"
|
||||
|
||||
defp format_error(:timeout), do: "timeout"
|
||||
defp format_error(:econnrefused), do: "connection refused"
|
||||
defp format_error(:nxdomain), do: "DNS lookup failed"
|
||||
defp format_error(:closed), do: "connection closed"
|
||||
defp format_error(%Mint.TransportError{reason: reason}), do: format_error(reason)
|
||||
defp format_error(reason) when is_atom(reason), do: to_string(reason)
|
||||
defp format_error(reason), do: inspect(reason)
|
||||
end
|
||||
189
lib/berrypod/redirects/link_scanner.ex
Normal file
189
lib/berrypod/redirects/link_scanner.ex
Normal file
@@ -0,0 +1,189 @@
|
||||
defmodule Berrypod.Redirects.LinkScanner do
|
||||
@moduledoc """
|
||||
Extracts outgoing URLs from page blocks and navigation items.
|
||||
|
||||
Used by the dead link checker to discover all links in site content,
|
||||
and by the admin UI to show where a broken URL is used.
|
||||
"""
|
||||
|
||||
alias Berrypod.Pages
|
||||
alias Berrypod.Settings
|
||||
|
||||
# Block settings keys that contain URLs
|
||||
@url_keys ~w(cta_href secondary_cta_href link_href href url)
|
||||
|
||||
@doc """
|
||||
Scans all pages and navigation for URLs.
|
||||
|
||||
Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`.
|
||||
URLs are deduplicated — each unique URL appears once with all its sources.
|
||||
"""
|
||||
def scan_all do
|
||||
page_links = scan_all_pages()
|
||||
nav_links = scan_nav()
|
||||
|
||||
(page_links ++ nav_links)
|
||||
|> group_by_url()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Scans a single page's blocks for URLs.
|
||||
|
||||
Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`.
|
||||
"""
|
||||
def scan_page(page) do
|
||||
page
|
||||
|> extract_page_links()
|
||||
|> group_by_url()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Scans navigation items for URLs.
|
||||
|
||||
Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`.
|
||||
"""
|
||||
def scan_nav do
|
||||
header_items = load_nav_items("header_nav")
|
||||
footer_items = load_nav_items("footer_nav")
|
||||
|
||||
header_links =
|
||||
Enum.flat_map(header_items, fn item ->
|
||||
case extract_url(item["href"]) do
|
||||
nil -> []
|
||||
url -> [%{url: url, type: classify(url), source: nav_source("header", item)}]
|
||||
end
|
||||
end)
|
||||
|
||||
footer_links =
|
||||
Enum.flat_map(footer_items, fn item ->
|
||||
case extract_url(item["href"]) do
|
||||
nil -> []
|
||||
url -> [%{url: url, type: classify(url), source: nav_source("footer", item)}]
|
||||
end
|
||||
end)
|
||||
|
||||
(header_links ++ footer_links)
|
||||
|> group_by_url()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Finds all sources (pages, nav items) where a specific URL is used.
|
||||
|
||||
Returns a list of `%{type: string, id: string, label: string, edit_path: string}`.
|
||||
"""
|
||||
def find_sources(url) do
|
||||
page_sources =
|
||||
Pages.list_all_pages()
|
||||
|> Enum.flat_map(fn page ->
|
||||
blocks = page[:blocks] || []
|
||||
|
||||
blocks
|
||||
|> Enum.flat_map(fn block ->
|
||||
settings = block["settings"] || %{}
|
||||
|
||||
@url_keys
|
||||
|> Enum.filter(fn key -> settings[key] == url end)
|
||||
|> Enum.map(fn _key -> page_source(page, block) end)
|
||||
end)
|
||||
end)
|
||||
|
||||
nav_sources = find_nav_sources(url)
|
||||
|
||||
page_sources ++ nav_sources
|
||||
end
|
||||
|
||||
# ── Private ──────────────────────────────────────────────────────
|
||||
|
||||
defp scan_all_pages do
|
||||
Pages.list_all_pages()
|
||||
|> Enum.flat_map(&extract_page_links/1)
|
||||
end
|
||||
|
||||
defp extract_page_links(page) do
|
||||
blocks = page[:blocks] || []
|
||||
|
||||
Enum.flat_map(blocks, fn block ->
|
||||
settings = block["settings"] || %{}
|
||||
|
||||
@url_keys
|
||||
|> Enum.flat_map(fn key ->
|
||||
case extract_url(settings[key]) do
|
||||
nil -> []
|
||||
url -> [%{url: url, type: classify(url), source: page_source(page, block)}]
|
||||
end
|
||||
end)
|
||||
end)
|
||||
end
|
||||
|
||||
defp extract_url(nil), do: nil
|
||||
defp extract_url(""), do: nil
|
||||
defp extract_url("#" <> _), do: nil
|
||||
defp extract_url("mailto:" <> _), do: nil
|
||||
defp extract_url("tel:" <> _), do: nil
|
||||
defp extract_url("{{" <> _), do: nil
|
||||
defp extract_url(url) when is_binary(url), do: String.trim(url)
|
||||
defp extract_url(_), do: nil
|
||||
|
||||
defp classify("/" <> _), do: :internal
|
||||
defp classify("http" <> _), do: :external
|
||||
defp classify(_), do: :external
|
||||
|
||||
defp page_source(page, block) do
|
||||
block_type = block["type"] || "unknown"
|
||||
page_title = page[:title] || page[:slug] || "Unknown"
|
||||
|
||||
%{
|
||||
type: "page_block",
|
||||
id: to_string(page[:slug]),
|
||||
label: "#{page_title} — #{block_type}",
|
||||
edit_path: "/admin/pages/#{page[:slug]}"
|
||||
}
|
||||
end
|
||||
|
||||
defp nav_source(location, item) do
|
||||
label = item["label"] || "Unknown"
|
||||
|
||||
%{
|
||||
type: "nav_item",
|
||||
id: "#{location}_nav",
|
||||
label: "#{String.capitalize(location)} nav — #{label}",
|
||||
edit_path: "/admin/navigation"
|
||||
}
|
||||
end
|
||||
|
||||
defp find_nav_sources(url) do
|
||||
header_items = load_nav_items("header_nav")
|
||||
footer_items = load_nav_items("footer_nav")
|
||||
|
||||
header =
|
||||
header_items
|
||||
|> Enum.filter(&(&1["href"] == url))
|
||||
|> Enum.map(&nav_source("header", &1))
|
||||
|
||||
footer =
|
||||
footer_items
|
||||
|> Enum.filter(&(&1["href"] == url))
|
||||
|> Enum.map(&nav_source("footer", &1))
|
||||
|
||||
header ++ footer
|
||||
end
|
||||
|
||||
defp load_nav_items(key) do
|
||||
case Settings.get_setting(key) do
|
||||
items when is_list(items) -> items
|
||||
_ -> []
|
||||
end
|
||||
end
|
||||
|
||||
defp group_by_url(links) do
|
||||
links
|
||||
|> Enum.group_by(& &1.url)
|
||||
|> Enum.map(fn {url, entries} ->
|
||||
%{
|
||||
url: url,
|
||||
type: hd(entries).type,
|
||||
sources: Enum.map(entries, & &1.source)
|
||||
}
|
||||
end)
|
||||
end
|
||||
end
|
||||
164
lib/berrypod/workers/dead_link_checker_worker.ex
Normal file
164
lib/berrypod/workers/dead_link_checker_worker.ex
Normal file
@@ -0,0 +1,164 @@
|
||||
defmodule Berrypod.Workers.DeadLinkCheckerWorker do
|
||||
@moduledoc """
|
||||
Daily Oban cron job that scans all content for broken links.
|
||||
|
||||
Can also be run on-demand for a single page via `new(%{"page_slug" => slug})`.
|
||||
"""
|
||||
|
||||
use Oban.Worker, queue: :default, max_attempts: 1
|
||||
|
||||
require Logger
|
||||
|
||||
alias Berrypod.Redirects
|
||||
alias Berrypod.Redirects.{LinkScanner, LinkChecker}
|
||||
|
||||
@impl Oban.Worker
|
||||
def perform(%Oban.Job{args: %{"page_slug" => slug}}) do
|
||||
check_page(slug)
|
||||
broadcast_changed()
|
||||
:ok
|
||||
end
|
||||
|
||||
def perform(%Oban.Job{args: %{"check_url" => url}}) do
|
||||
check_url(url)
|
||||
broadcast_changed()
|
||||
:ok
|
||||
end
|
||||
|
||||
def perform(_job) do
|
||||
check_all()
|
||||
broadcast_changed()
|
||||
:ok
|
||||
end
|
||||
|
||||
@doc """
|
||||
Runs a full scan of all content links.
|
||||
"""
|
||||
def check_all do
|
||||
links = LinkScanner.scan_all()
|
||||
all_urls = Enum.map(links, & &1.url)
|
||||
|
||||
{broken, healthy} =
|
||||
links
|
||||
|> Enum.uniq_by(& &1.url)
|
||||
|> Enum.split_with(&broken?/1)
|
||||
|
||||
# Record broken links
|
||||
for link <- broken do
|
||||
{http_status, error} = check_result(link)
|
||||
|
||||
Redirects.upsert_dead_link(%{
|
||||
url: link.url,
|
||||
url_type: to_string(link.type),
|
||||
status: "broken",
|
||||
http_status: http_status,
|
||||
error: error
|
||||
})
|
||||
end
|
||||
|
||||
# Clear any previously-broken links that are now healthy
|
||||
for link <- healthy do
|
||||
Redirects.clear_healthy_link(link.url)
|
||||
end
|
||||
|
||||
# Remove dead_links for URLs no longer in content
|
||||
Redirects.prune_orphan_dead_links(all_urls)
|
||||
|
||||
broken_count = length(broken)
|
||||
|
||||
if broken_count > 0 do
|
||||
Logger.warning("Dead link check: #{broken_count} broken link(s) found")
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
|
||||
@doc """
|
||||
Checks links for a single page.
|
||||
"""
|
||||
def check_page(slug) do
|
||||
case Berrypod.Pages.get_page(slug) do
|
||||
nil ->
|
||||
:ok
|
||||
|
||||
page ->
|
||||
links = LinkScanner.scan_page(page)
|
||||
|
||||
for link <- Enum.uniq_by(links, & &1.url) do
|
||||
if broken?(link) do
|
||||
{http_status, error} = check_result(link)
|
||||
|
||||
Redirects.upsert_dead_link(%{
|
||||
url: link.url,
|
||||
url_type: to_string(link.type),
|
||||
status: "broken",
|
||||
http_status: http_status,
|
||||
error: error
|
||||
})
|
||||
else
|
||||
Redirects.clear_healthy_link(link.url)
|
||||
end
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Re-checks a single URL and updates its dead_link record.
|
||||
"""
|
||||
def check_url(url) do
|
||||
type = if String.starts_with?(url, "/"), do: :internal, else: :external
|
||||
link = %{url: url, type: type}
|
||||
|
||||
if broken?(link) do
|
||||
{http_status, error} = check_result(link)
|
||||
|
||||
Redirects.upsert_dead_link(%{
|
||||
url: url,
|
||||
url_type: to_string(type),
|
||||
status: "broken",
|
||||
http_status: http_status,
|
||||
error: error
|
||||
})
|
||||
else
|
||||
Redirects.clear_healthy_link(url)
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
|
||||
defp broadcast_changed do
|
||||
Phoenix.PubSub.broadcast(Berrypod.PubSub, "redirects", :dead_links_changed)
|
||||
end
|
||||
|
||||
defp broken?(%{type: :internal, url: url}) do
|
||||
case LinkChecker.check_internal(url) do
|
||||
:ok -> false
|
||||
{:error, _} -> true
|
||||
end
|
||||
end
|
||||
|
||||
defp broken?(%{type: :external, url: url}) do
|
||||
case LinkChecker.check_external(url) do
|
||||
:ok -> false
|
||||
{:error, _} -> true
|
||||
{:error, _, _} -> true
|
||||
end
|
||||
end
|
||||
|
||||
defp check_result(%{type: :internal, url: url}) do
|
||||
case LinkChecker.check_internal(url) do
|
||||
{:error, reason} -> {nil, reason}
|
||||
:ok -> {nil, nil}
|
||||
end
|
||||
end
|
||||
|
||||
defp check_result(%{type: :external, url: url}) do
|
||||
case LinkChecker.check_external(url) do
|
||||
{:error, status, reason} -> {status, reason}
|
||||
{:error, reason} -> {nil, reason}
|
||||
:ok -> {nil, nil}
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user