From b235219aeeb309dc015faf2a3bfde85b278a8ae0 Mon Sep 17 00:00:00 2001 From: jamey Date: Sun, 1 Mar 2026 13:00:59 +0000 Subject: [PATCH] add dead link monitoring for outgoing content links Scans page blocks and nav items for broken URLs (internal via DB lookup, external via HTTP HEAD). Daily Oban cron at 03:30, plus on-demand checks when pages are saved. Admin UI tab on redirects page with re-check, ignore, and clickable source links. Co-Authored-By: Claude Opus 4.6 --- config/config.exs | 1 + lib/berrypod/pages.ex | 6 + lib/berrypod/redirects.ex | 101 ++++++++- lib/berrypod/redirects/dead_link.ex | 30 +++ lib/berrypod/redirects/link_checker.ex | 127 +++++++++++ lib/berrypod/redirects/link_scanner.ex | 189 ++++++++++++++++ .../workers/dead_link_checker_worker.ex | 164 ++++++++++++++ lib/berrypod_web/live/admin/redirects.ex | 155 ++++++++++++- .../20260301100229_create_dead_links.exs | 20 ++ test/berrypod/redirects/link_checker_test.exs | 111 ++++++++++ test/berrypod/redirects/link_scanner_test.exs | 207 ++++++++++++++++++ 11 files changed, 1109 insertions(+), 2 deletions(-) create mode 100644 lib/berrypod/redirects/dead_link.ex create mode 100644 lib/berrypod/redirects/link_checker.ex create mode 100644 lib/berrypod/redirects/link_scanner.ex create mode 100644 lib/berrypod/workers/dead_link_checker_worker.ex create mode 100644 priv/repo/migrations/20260301100229_create_dead_links.exs create mode 100644 test/berrypod/redirects/link_checker_test.exs create mode 100644 test/berrypod/redirects/link_scanner_test.exs diff --git a/config/config.exs b/config/config.exs index faca4fe..8fc2e19 100644 --- a/config/config.exs +++ b/config/config.exs @@ -97,6 +97,7 @@ config :berrypod, Oban, {"0 3 * * *", Berrypod.Analytics.RetentionWorker}, {"0 4 * * *", Berrypod.Orders.AbandonedCartPruneWorker}, {"0 5 * * 1", Berrypod.Workers.RedirectPrunerWorker}, + {"30 3 * * *", Berrypod.Workers.DeadLinkCheckerWorker}, {"0 2 * * *", Berrypod.Newsletter.CleanupWorker}, {"*/5 * * * *", Berrypod.Newsletter.ScheduledCampaignWorker} ]} diff --git a/lib/berrypod/pages.ex b/lib/berrypod/pages.ex index 8f0865d..92f19ae 100644 --- a/lib/berrypod/pages.ex +++ b/lib/berrypod/pages.ex @@ -122,6 +122,7 @@ defmodule Berrypod.Pages do case result do {:ok, page} -> PageCache.invalidate(slug) + enqueue_link_check(slug) {:ok, page} error -> @@ -176,6 +177,7 @@ defmodule Berrypod.Pages do }) end + enqueue_link_check(updated.slug) {:ok, updated} error -> @@ -256,6 +258,10 @@ defmodule Berrypod.Pages do # ── Helpers ─────────────────────────────────────────────────────── + defp enqueue_link_check(slug) do + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{"page_slug" => slug})) + end + defp page_to_map(%Page{} = page) do %{ slug: page.slug, diff --git a/lib/berrypod/redirects.ex b/lib/berrypod/redirects.ex index 72c19c4..cda6bc6 100644 --- a/lib/berrypod/redirects.ex +++ b/lib/berrypod/redirects.ex @@ -9,7 +9,7 @@ defmodule Berrypod.Redirects do import Ecto.Query alias Berrypod.Repo - alias Berrypod.Redirects.{Redirect, BrokenUrl} + alias Berrypod.Redirects.{Redirect, BrokenUrl, DeadLink} @table :redirects_cache @pubsub_topic "redirects" @@ -407,4 +407,103 @@ defmodule Berrypod.Redirects do {:ok, count} end + + # ── Dead links ── + + @doc """ + Lists dead links, paginated, ordered by most recent first. + """ + def list_dead_links_paginated(opts \\ []) do + status = opts[:status] || "broken" + + from(d in DeadLink, + where: d.status == ^status, + order_by: [desc: d.last_checked_at] + ) + |> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25) + end + + @doc """ + Counts dead links with status "broken". + """ + def count_dead_links do + Repo.aggregate(from(d in DeadLink, where: d.status == "broken"), :count) + end + + @doc """ + Gets a dead link by ID. + """ + def get_dead_link!(id), do: Repo.get!(DeadLink, id) + + @doc """ + Gets a dead link by URL, or nil. + """ + def get_dead_link_by_url(url) do + Repo.one(from d in DeadLink, where: d.url == ^url) + end + + @doc """ + Creates or updates a dead link record for a broken URL. + """ + def upsert_dead_link(attrs) do + now = DateTime.utc_now() |> DateTime.truncate(:second) + attrs = Map.put(attrs, :last_checked_at, now) + + case get_dead_link_by_url(attrs[:url] || attrs["url"]) do + nil -> + %DeadLink{} + |> DeadLink.changeset(attrs) + |> Repo.insert() + + existing -> + # Don't overwrite an "ignored" status + attrs = + if existing.status == "ignored", + do: Map.delete(attrs, :status), + else: attrs + + existing + |> DeadLink.changeset(attrs) + |> Repo.update() + end + end + + @doc """ + Removes a dead link record (URL is now healthy). + """ + def clear_healthy_link(url) do + case get_dead_link_by_url(url) do + nil -> :ok + %{status: "ignored"} -> :ok + dead_link -> Repo.delete(dead_link) + end + end + + @doc """ + Marks a dead link as ignored. + """ + def ignore_dead_link(%DeadLink{} = dead_link) do + dead_link + |> DeadLink.changeset(%{status: "ignored"}) + |> Repo.update() + end + + @doc """ + Deletes a dead link. + """ + def delete_dead_link(%DeadLink{} = dead_link) do + Repo.delete(dead_link) + end + + @doc """ + Removes dead link records for URLs no longer present in any content. + """ + def prune_orphan_dead_links(current_urls) do + url_set = MapSet.new(current_urls) + + from(d in DeadLink) + |> Repo.all() + |> Enum.filter(fn d -> not MapSet.member?(url_set, d.url) end) + |> Enum.each(&Repo.delete/1) + end end diff --git a/lib/berrypod/redirects/dead_link.ex b/lib/berrypod/redirects/dead_link.ex new file mode 100644 index 0000000..02981ff --- /dev/null +++ b/lib/berrypod/redirects/dead_link.ex @@ -0,0 +1,30 @@ +defmodule Berrypod.Redirects.DeadLink do + use Ecto.Schema + import Ecto.Changeset + + @primary_key {:id, :binary_id, autogenerate: true} + @foreign_key_type :binary_id + + @url_types ~w(internal external) + @statuses ~w(broken ignored) + + schema "dead_links" do + field :url, :string + field :url_type, :string, default: "external" + field :status, :string, default: "broken" + field :http_status, :integer + field :error, :string + field :last_checked_at, :utc_datetime + + timestamps() + end + + def changeset(dead_link, attrs) do + dead_link + |> cast(attrs, [:url, :url_type, :status, :http_status, :error, :last_checked_at]) + |> validate_required([:url, :url_type, :last_checked_at]) + |> validate_inclusion(:url_type, @url_types) + |> validate_inclusion(:status, @statuses) + |> unique_constraint(:url) + end +end diff --git a/lib/berrypod/redirects/link_checker.ex b/lib/berrypod/redirects/link_checker.ex new file mode 100644 index 0000000..abae5a0 --- /dev/null +++ b/lib/berrypod/redirects/link_checker.ex @@ -0,0 +1,127 @@ +defmodule Berrypod.Redirects.LinkChecker do + @moduledoc """ + Validates URLs found by the link scanner. + + Internal links are checked against the router and database. + External links are checked via HTTP HEAD requests. + """ + + alias Berrypod.Products + alias Berrypod.Pages + + # Static routes that are always valid + @static_paths ~w( + / /about /contact /delivery /privacy /terms /cart /search /coming-soon + /collections/all + ) + + @doc """ + Checks whether an internal path is valid. + + Returns `:ok` or `{:error, reason}`. + """ + def check_internal(path) do + cond do + path in @static_paths -> + :ok + + match?("/products/" <> _, path) -> + slug = String.replace_prefix(path, "/products/", "") + if Products.get_visible_product(slug), do: :ok, else: {:error, "product not found"} + + match?("/collections/" <> _, path) -> + # Collection routes are always valid (they filter, never 404) + :ok + + true -> + # Could be a custom page — check the slug (strip leading /) + slug = String.replace_prefix(path, "/", "") + + if Pages.get_page(slug) do + :ok + else + {:error, "page not found"} + end + end + end + + @doc """ + Checks whether an external URL is reachable. + + Uses HTTP HEAD with a fallback to GET (some servers reject HEAD). + Returns `:ok`, `{:error, status, reason}`, or `{:error, reason}`. + """ + def check_external(url) do + req = build_req() + + case Req.head(req, url: url) do + {:ok, %{status: status}} when status in 200..399 -> + :ok + + {:ok, %{status: 405}} -> + # HEAD not allowed, try GET + check_external_get(req, url) + + {:ok, %{status: status}} -> + {:error, status, status_reason(status)} + + {:error, %{reason: reason}} -> + {:error, format_error(reason)} + + {:error, reason} -> + {:error, format_error(reason)} + end + end + + defp check_external_get(req, url) do + case Req.get(req, url: url) do + {:ok, %{status: status}} when status in 200..399 -> + :ok + + {:ok, %{status: status}} -> + {:error, status, status_reason(status)} + + {:error, %{reason: reason}} -> + {:error, format_error(reason)} + + {:error, reason} -> + {:error, format_error(reason)} + end + end + + defp build_req do + Req.new( + connect_options: [timeout: 5_000], + receive_timeout: 5_000, + max_retries: 0, + redirect: true, + max_redirects: 5, + headers: [{"user-agent", "Berrypod Link Checker/1.0"}] + ) + |> maybe_attach_test_plug() + end + + defp maybe_attach_test_plug(req) do + if plug = Application.get_env(:berrypod, :link_checker_plug) do + Req.merge(req, plug: plug) + else + req + end + end + + defp status_reason(404), do: "not found" + defp status_reason(403), do: "forbidden" + defp status_reason(410), do: "gone" + defp status_reason(500), do: "server error" + defp status_reason(502), do: "bad gateway" + defp status_reason(503), do: "service unavailable" + defp status_reason(status), do: "HTTP #{status}" + + defp format_error(:timeout), do: "timeout" + defp format_error(:econnrefused), do: "connection refused" + defp format_error(:nxdomain), do: "DNS lookup failed" + defp format_error(:closed), do: "connection closed" + defp format_error(%Mint.TransportError{reason: reason}), do: format_error(reason) + defp format_error(reason) when is_atom(reason), do: to_string(reason) + defp format_error(reason), do: inspect(reason) +end diff --git a/lib/berrypod/redirects/link_scanner.ex b/lib/berrypod/redirects/link_scanner.ex new file mode 100644 index 0000000..858d29d --- /dev/null +++ b/lib/berrypod/redirects/link_scanner.ex @@ -0,0 +1,189 @@ +defmodule Berrypod.Redirects.LinkScanner do + @moduledoc """ + Extracts outgoing URLs from page blocks and navigation items. + + Used by the dead link checker to discover all links in site content, + and by the admin UI to show where a broken URL is used. + """ + + alias Berrypod.Pages + alias Berrypod.Settings + + # Block settings keys that contain URLs + @url_keys ~w(cta_href secondary_cta_href link_href href url) + + @doc """ + Scans all pages and navigation for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + URLs are deduplicated — each unique URL appears once with all its sources. + """ + def scan_all do + page_links = scan_all_pages() + nav_links = scan_nav() + + (page_links ++ nav_links) + |> group_by_url() + end + + @doc """ + Scans a single page's blocks for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + """ + def scan_page(page) do + page + |> extract_page_links() + |> group_by_url() + end + + @doc """ + Scans navigation items for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + """ + def scan_nav do + header_items = load_nav_items("header_nav") + footer_items = load_nav_items("footer_nav") + + header_links = + Enum.flat_map(header_items, fn item -> + case extract_url(item["href"]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: nav_source("header", item)}] + end + end) + + footer_links = + Enum.flat_map(footer_items, fn item -> + case extract_url(item["href"]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: nav_source("footer", item)}] + end + end) + + (header_links ++ footer_links) + |> group_by_url() + end + + @doc """ + Finds all sources (pages, nav items) where a specific URL is used. + + Returns a list of `%{type: string, id: string, label: string, edit_path: string}`. + """ + def find_sources(url) do + page_sources = + Pages.list_all_pages() + |> Enum.flat_map(fn page -> + blocks = page[:blocks] || [] + + blocks + |> Enum.flat_map(fn block -> + settings = block["settings"] || %{} + + @url_keys + |> Enum.filter(fn key -> settings[key] == url end) + |> Enum.map(fn _key -> page_source(page, block) end) + end) + end) + + nav_sources = find_nav_sources(url) + + page_sources ++ nav_sources + end + + # ── Private ────────────────────────────────────────────────────── + + defp scan_all_pages do + Pages.list_all_pages() + |> Enum.flat_map(&extract_page_links/1) + end + + defp extract_page_links(page) do + blocks = page[:blocks] || [] + + Enum.flat_map(blocks, fn block -> + settings = block["settings"] || %{} + + @url_keys + |> Enum.flat_map(fn key -> + case extract_url(settings[key]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: page_source(page, block)}] + end + end) + end) + end + + defp extract_url(nil), do: nil + defp extract_url(""), do: nil + defp extract_url("#" <> _), do: nil + defp extract_url("mailto:" <> _), do: nil + defp extract_url("tel:" <> _), do: nil + defp extract_url("{{" <> _), do: nil + defp extract_url(url) when is_binary(url), do: String.trim(url) + defp extract_url(_), do: nil + + defp classify("/" <> _), do: :internal + defp classify("http" <> _), do: :external + defp classify(_), do: :external + + defp page_source(page, block) do + block_type = block["type"] || "unknown" + page_title = page[:title] || page[:slug] || "Unknown" + + %{ + type: "page_block", + id: to_string(page[:slug]), + label: "#{page_title} — #{block_type}", + edit_path: "/admin/pages/#{page[:slug]}" + } + end + + defp nav_source(location, item) do + label = item["label"] || "Unknown" + + %{ + type: "nav_item", + id: "#{location}_nav", + label: "#{String.capitalize(location)} nav — #{label}", + edit_path: "/admin/navigation" + } + end + + defp find_nav_sources(url) do + header_items = load_nav_items("header_nav") + footer_items = load_nav_items("footer_nav") + + header = + header_items + |> Enum.filter(&(&1["href"] == url)) + |> Enum.map(&nav_source("header", &1)) + + footer = + footer_items + |> Enum.filter(&(&1["href"] == url)) + |> Enum.map(&nav_source("footer", &1)) + + header ++ footer + end + + defp load_nav_items(key) do + case Settings.get_setting(key) do + items when is_list(items) -> items + _ -> [] + end + end + + defp group_by_url(links) do + links + |> Enum.group_by(& &1.url) + |> Enum.map(fn {url, entries} -> + %{ + url: url, + type: hd(entries).type, + sources: Enum.map(entries, & &1.source) + } + end) + end +end diff --git a/lib/berrypod/workers/dead_link_checker_worker.ex b/lib/berrypod/workers/dead_link_checker_worker.ex new file mode 100644 index 0000000..b6c5188 --- /dev/null +++ b/lib/berrypod/workers/dead_link_checker_worker.ex @@ -0,0 +1,164 @@ +defmodule Berrypod.Workers.DeadLinkCheckerWorker do + @moduledoc """ + Daily Oban cron job that scans all content for broken links. + + Can also be run on-demand for a single page via `new(%{"page_slug" => slug})`. + """ + + use Oban.Worker, queue: :default, max_attempts: 1 + + require Logger + + alias Berrypod.Redirects + alias Berrypod.Redirects.{LinkScanner, LinkChecker} + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"page_slug" => slug}}) do + check_page(slug) + broadcast_changed() + :ok + end + + def perform(%Oban.Job{args: %{"check_url" => url}}) do + check_url(url) + broadcast_changed() + :ok + end + + def perform(_job) do + check_all() + broadcast_changed() + :ok + end + + @doc """ + Runs a full scan of all content links. + """ + def check_all do + links = LinkScanner.scan_all() + all_urls = Enum.map(links, & &1.url) + + {broken, healthy} = + links + |> Enum.uniq_by(& &1.url) + |> Enum.split_with(&broken?/1) + + # Record broken links + for link <- broken do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: link.url, + url_type: to_string(link.type), + status: "broken", + http_status: http_status, + error: error + }) + end + + # Clear any previously-broken links that are now healthy + for link <- healthy do + Redirects.clear_healthy_link(link.url) + end + + # Remove dead_links for URLs no longer in content + Redirects.prune_orphan_dead_links(all_urls) + + broken_count = length(broken) + + if broken_count > 0 do + Logger.warning("Dead link check: #{broken_count} broken link(s) found") + end + + :ok + end + + @doc """ + Checks links for a single page. + """ + def check_page(slug) do + case Berrypod.Pages.get_page(slug) do + nil -> + :ok + + page -> + links = LinkScanner.scan_page(page) + + for link <- Enum.uniq_by(links, & &1.url) do + if broken?(link) do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: link.url, + url_type: to_string(link.type), + status: "broken", + http_status: http_status, + error: error + }) + else + Redirects.clear_healthy_link(link.url) + end + end + + :ok + end + end + + @doc """ + Re-checks a single URL and updates its dead_link record. + """ + def check_url(url) do + type = if String.starts_with?(url, "/"), do: :internal, else: :external + link = %{url: url, type: type} + + if broken?(link) do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: url, + url_type: to_string(type), + status: "broken", + http_status: http_status, + error: error + }) + else + Redirects.clear_healthy_link(url) + end + + :ok + end + + defp broadcast_changed do + Phoenix.PubSub.broadcast(Berrypod.PubSub, "redirects", :dead_links_changed) + end + + defp broken?(%{type: :internal, url: url}) do + case LinkChecker.check_internal(url) do + :ok -> false + {:error, _} -> true + end + end + + defp broken?(%{type: :external, url: url}) do + case LinkChecker.check_external(url) do + :ok -> false + {:error, _} -> true + {:error, _, _} -> true + end + end + + defp check_result(%{type: :internal, url: url}) do + case LinkChecker.check_internal(url) do + {:error, reason} -> {nil, reason} + :ok -> {nil, nil} + end + end + + defp check_result(%{type: :external, url: url}) do + case LinkChecker.check_external(url) do + {:error, status, reason} -> {status, reason} + {:error, reason} -> {nil, reason} + :ok -> {nil, nil} + end + end +end diff --git a/lib/berrypod_web/live/admin/redirects.ex b/lib/berrypod_web/live/admin/redirects.ex index 2bbb651..e77c381 100644 --- a/lib/berrypod_web/live/admin/redirects.ex +++ b/lib/berrypod_web/live/admin/redirects.ex @@ -2,8 +2,9 @@ defmodule BerrypodWeb.Admin.Redirects do use BerrypodWeb, :live_view alias Berrypod.Redirects + alias Berrypod.Redirects.LinkScanner - @valid_tabs ~w(redirects broken create) + @valid_tabs ~w(redirects broken dead_links create) @impl true def mount(_params, _session, socket) do @@ -11,14 +12,18 @@ defmodule BerrypodWeb.Admin.Redirects do redirect_page = Redirects.list_redirects_paginated(page: 1) broken_page = Redirects.list_broken_urls_paginated(page: 1) + dead_link_page = Redirects.list_dead_links_paginated(page: 1) socket = socket |> assign(:page_title, "Redirects") |> assign(:redirect_pagination, redirect_page) |> assign(:broken_url_pagination, broken_page) + |> assign(:dead_link_pagination, dead_link_page) + |> assign(:dead_link_count, Redirects.count_dead_links()) |> stream(:redirects, redirect_page.items) |> stream(:broken_urls, broken_page.items) + |> stream(:dead_links, dead_link_page.items) |> assign( :form, to_form(%{"from_path" => "", "to_path" => "", "status_code" => "301"}, as: :redirect) @@ -50,6 +55,13 @@ defmodule BerrypodWeb.Admin.Redirects do |> assign(:broken_url_pagination, page) |> stream(:broken_urls, page.items, reset: true) + "dead_links" -> + page = Redirects.list_dead_links_paginated(page: page_num) + + socket + |> assign(:dead_link_pagination, page) + |> stream(:dead_links, page.items, reset: true) + _ -> socket end @@ -76,6 +88,16 @@ defmodule BerrypodWeb.Admin.Redirects do |> stream(:broken_urls, page.items, reset: true)} end + def handle_info(:dead_links_changed, socket) do + page = Redirects.list_dead_links_paginated(page: socket.assigns.dead_link_pagination.page) + + {:noreply, + socket + |> assign(:dead_link_pagination, page) + |> assign(:dead_link_count, Redirects.count_dead_links()) + |> stream(:dead_links, page.items, reset: true)} + end + @impl true def handle_event("switch_tab", %{"tab" => tab}, socket) do {:noreply, push_patch(socket, to: ~p"/admin/redirects?#{%{tab: tab}}")} @@ -132,6 +154,32 @@ defmodule BerrypodWeb.Admin.Redirects do |> stream(:broken_urls, page.items, reset: true)} end + def handle_event("ignore_dead_link", %{"id" => id}, socket) do + dead_link = Redirects.get_dead_link!(id) + {:ok, _} = Redirects.ignore_dead_link(dead_link) + + page = Redirects.list_dead_links_paginated(page: socket.assigns.dead_link_pagination.page) + + {:noreply, + socket + |> assign(:dead_link_pagination, page) + |> assign(:dead_link_count, Redirects.count_dead_links()) + |> stream(:dead_links, page.items, reset: true)} + end + + def handle_event("recheck_dead_link", %{"id" => id}, socket) do + dead_link = Redirects.get_dead_link!(id) + + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{"check_url" => dead_link.url})) + + {:noreply, put_flash(socket, :info, "Re-checking #{dead_link.url}...")} + end + + def handle_event("check_all_links", _params, socket) do + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{})) + {:noreply, put_flash(socket, :info, "Full link check started...")} + end + def handle_event("redirect_broken_url", %{"path" => path}, socket) do socket = socket @@ -164,6 +212,12 @@ defmodule BerrypodWeb.Admin.Redirects do count={@broken_url_pagination.total_count} active={@tab} /> + <.tab_button + tab="dead_links" + label="Dead links" + count={@dead_link_count} + active={@tab} + /> <.tab_button tab="create" label="Create" active={@tab} /> @@ -175,6 +229,10 @@ defmodule BerrypodWeb.Admin.Redirects do <.broken_urls_table streams={@streams} pagination={@broken_url_pagination} /> <% end %> + <%= if @tab == "dead_links" do %> + <.dead_links_table streams={@streams} pagination={@dead_link_pagination} /> + <% end %> + <%= if @tab == "create" do %> <.create_form form={@form} /> <% end %> @@ -287,6 +345,71 @@ defmodule BerrypodWeb.Admin.Redirects do """ end + defp dead_links_table(assigns) do + ~H""" +
+ +
+ + <%= if @pagination.total_count == 0 do %> +

No dead links detected.

+ <% else %> +
+ + + + + + + + + + + + + + + + + + + + + +
URLTypeErrorUsed inLast checked
{dead_link.url} + + {dead_link.url_type} + + {format_dead_link_error(dead_link)}<.dead_link_sources url={dead_link.url} />{Calendar.strftime(dead_link.last_checked_at, "%d %b %Y %H:%M")} + + +
+
+ + <.admin_pagination + page={@pagination} + patch={~p"/admin/redirects"} + params={%{"tab" => "dead_links"}} + /> + <% end %> + """ + end + defp create_form(assigns) do ~H""" <.form for={@form} phx-submit="create_redirect" style="max-width: 32rem;"> @@ -335,6 +458,36 @@ defmodule BerrypodWeb.Admin.Redirects do """ end + defp dead_link_type_colour("internal"), do: "warning" + defp dead_link_type_colour("external"), do: "info" + defp dead_link_type_colour(_), do: "neutral" + + defp format_dead_link_error(%{http_status: status, error: error}) when not is_nil(status) do + "#{status} #{error}" + end + + defp format_dead_link_error(%{error: error}) when not is_nil(error), do: error + defp format_dead_link_error(_), do: "—" + + defp dead_link_sources(assigns) do + assigns = assign(assigns, :sources, LinkScanner.find_sources(assigns.url)) + + ~H""" + <%= case @sources do %> + <% [] -> %> + + <% [source] -> %> + <.link navigate={source.edit_path} class="underline">{source.label} + <% sources -> %> + + <% end %> + """ + end + defp resolve_matching_broken_url(from_path) do case Redirects.get_broken_url_by_path(from_path) do nil -> :ok diff --git a/priv/repo/migrations/20260301100229_create_dead_links.exs b/priv/repo/migrations/20260301100229_create_dead_links.exs new file mode 100644 index 0000000..9189f0e --- /dev/null +++ b/priv/repo/migrations/20260301100229_create_dead_links.exs @@ -0,0 +1,20 @@ +defmodule Berrypod.Repo.Migrations.CreateDeadLinks do + use Ecto.Migration + + def change do + create table(:dead_links, primary_key: false) do + add :id, :binary_id, primary_key: true + add :url, :string, null: false + add :url_type, :string, null: false, default: "external" + add :status, :string, null: false, default: "broken" + add :http_status, :integer + add :error, :string + add :last_checked_at, :utc_datetime, null: false + + timestamps() + end + + create unique_index(:dead_links, [:url]) + create index(:dead_links, [:status]) + end +end diff --git a/test/berrypod/redirects/link_checker_test.exs b/test/berrypod/redirects/link_checker_test.exs new file mode 100644 index 0000000..64940f7 --- /dev/null +++ b/test/berrypod/redirects/link_checker_test.exs @@ -0,0 +1,111 @@ +defmodule Berrypod.Redirects.LinkCheckerTest do + use Berrypod.DataCase, async: true + + alias Berrypod.Redirects.LinkChecker + + describe "check_internal/1" do + test "static routes are valid" do + assert :ok = LinkChecker.check_internal("/") + assert :ok = LinkChecker.check_internal("/about") + assert :ok = LinkChecker.check_internal("/contact") + assert :ok = LinkChecker.check_internal("/delivery") + assert :ok = LinkChecker.check_internal("/privacy") + assert :ok = LinkChecker.check_internal("/terms") + assert :ok = LinkChecker.check_internal("/cart") + assert :ok = LinkChecker.check_internal("/search") + assert :ok = LinkChecker.check_internal("/collections/all") + end + + test "collection routes are always valid" do + assert :ok = LinkChecker.check_internal("/collections/nonexistent") + end + + test "existing product slug is valid" do + conn = Berrypod.ProductsFixtures.provider_connection_fixture() + + Berrypod.Products.create_product(%{ + provider_connection_id: conn.id, + provider_product_id: "checker_test_1", + title: "Test Product", + slug: "test-checker-product", + status: "active", + visible: true, + in_stock: true, + cheapest_price: 1000 + }) + + assert :ok = LinkChecker.check_internal("/products/test-checker-product") + end + + test "nonexistent product slug is broken" do + assert {:error, "product not found"} = + LinkChecker.check_internal("/products/nonexistent-product") + end + + test "existing custom page is valid" do + {:ok, _page} = + Berrypod.Pages.create_custom_page(%{ + slug: "checker-test-page", + title: "Test", + blocks: [] + }) + + assert :ok = LinkChecker.check_internal("/checker-test-page") + end + + test "nonexistent custom page is broken" do + assert {:error, "page not found"} = + LinkChecker.check_internal("/totally-nonexistent-page") + end + end + + describe "check_external/1" do + setup do + # Use a function plug to stub HTTP responses in tests + Application.put_env(:berrypod, :link_checker_plug, &__MODULE__.test_plug/1) + on_exit(fn -> Application.delete_env(:berrypod, :link_checker_plug) end) + :ok + end + + test "healthy URL returns :ok" do + assert :ok = LinkChecker.check_external("http://test/healthy") + end + + test "404 URL returns error with status" do + assert {:error, 404, "not found"} = LinkChecker.check_external("http://test/not-found") + end + + test "500 URL returns error with status" do + assert {:error, 500, "server error"} = + LinkChecker.check_external("http://test/server-error") + end + + test "405 falls back to GET" do + assert :ok = LinkChecker.check_external("http://test/head-not-allowed") + end + + # Test plug that simulates various responses + def test_plug(conn) do + case conn.request_path do + "/healthy" -> + Plug.Conn.send_resp(conn, 200, "OK") + + "/not-found" -> + Plug.Conn.send_resp(conn, 404, "Not Found") + + "/server-error" -> + Plug.Conn.send_resp(conn, 500, "Error") + + "/head-not-allowed" -> + if conn.method == "HEAD" do + Plug.Conn.send_resp(conn, 405, "Method Not Allowed") + else + Plug.Conn.send_resp(conn, 200, "OK") + end + + _ -> + Plug.Conn.send_resp(conn, 404, "Not Found") + end + end + end +end diff --git a/test/berrypod/redirects/link_scanner_test.exs b/test/berrypod/redirects/link_scanner_test.exs new file mode 100644 index 0000000..2ae4608 --- /dev/null +++ b/test/berrypod/redirects/link_scanner_test.exs @@ -0,0 +1,207 @@ +defmodule Berrypod.Redirects.LinkScannerTest do + use Berrypod.DataCase, async: true + + alias Berrypod.Redirects.LinkScanner + alias Berrypod.Pages + + describe "scan_page/1" do + test "extracts URLs from hero block" do + page = %{ + slug: "home", + title: "Home", + blocks: [ + %{ + "type" => "hero", + "settings" => %{ + "cta_href" => "/collections/all", + "secondary_cta_href" => "https://example.com" + } + } + ] + } + + links = LinkScanner.scan_page(page) + urls = Enum.map(links, & &1.url) |> Enum.sort() + + assert "/collections/all" in urls + assert "https://example.com" in urls + end + + test "extracts URLs from button block" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{"type" => "button", "settings" => %{"href" => "/about"}} + ] + } + + [link] = LinkScanner.scan_page(page) + assert link.url == "/about" + assert link.type == :internal + end + + test "extracts URLs from image_text block" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{"type" => "image_text", "settings" => %{"link_href" => "/products/mug"}} + ] + } + + [link] = LinkScanner.scan_page(page) + assert link.url == "/products/mug" + end + + test "extracts URLs from video_embed block" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{ + "type" => "video_embed", + "settings" => %{"url" => "https://youtube.com/watch?v=abc"} + } + ] + } + + [link] = LinkScanner.scan_page(page) + assert link.url == "https://youtube.com/watch?v=abc" + assert link.type == :external + end + + test "skips empty strings, anchors, mailto, tel, and template vars" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{"type" => "hero", "settings" => %{"cta_href" => ""}}, + %{"type" => "button", "settings" => %{"href" => "#section"}}, + %{"type" => "button", "settings" => %{"href" => "mailto:hi@example.com"}}, + %{"type" => "button", "settings" => %{"href" => "tel:+441234567890"}}, + %{"type" => "button", "settings" => %{"href" => "{{unsubscribe_url}}"}} + ] + } + + assert LinkScanner.scan_page(page) == [] + end + + test "deduplicates URLs and groups sources" do + page = %{ + slug: "home", + title: "Home", + blocks: [ + %{"type" => "hero", "settings" => %{"cta_href" => "/about"}}, + %{"type" => "button", "settings" => %{"href" => "/about"}} + ] + } + + links = LinkScanner.scan_page(page) + assert length(links) == 1 + + [link] = links + assert link.url == "/about" + assert length(link.sources) == 2 + end + + test "classifies internal and external URLs" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{"type" => "button", "settings" => %{"href" => "/about"}}, + %{"type" => "button", "settings" => %{"href" => "https://example.com"}} + ] + } + + links = LinkScanner.scan_page(page) |> Enum.sort_by(& &1.url) + + assert Enum.at(links, 0).type == :internal + assert Enum.at(links, 1).type == :external + end + + test "handles blocks with no settings" do + page = %{ + slug: "test", + title: "Test", + blocks: [ + %{"type" => "spacer"}, + %{"type" => "divider", "settings" => %{}} + ] + } + + assert LinkScanner.scan_page(page) == [] + end + end + + describe "scan_nav/0" do + test "extracts URLs from header and footer nav" do + Berrypod.Settings.put_setting( + "header_nav", + [%{"label" => "Shop", "href" => "/collections/all"}], + "json" + ) + + Berrypod.Settings.put_setting( + "footer_nav", + [%{"label" => "Privacy", "href" => "/privacy"}], + "json" + ) + + links = LinkScanner.scan_nav() + urls = Enum.map(links, & &1.url) + + assert "/collections/all" in urls + assert "/privacy" in urls + end + + test "returns empty when no nav configured" do + assert LinkScanner.scan_nav() == [] + end + end + + describe "find_sources/1" do + test "finds page blocks using a URL" do + # Save a custom page (not system page, avoids ETS cache issues) + {:ok, _page} = + Pages.create_custom_page(%{ + slug: "test-link-page", + title: "Test page", + blocks: [ + %{"type" => "hero", "settings" => %{"cta_href" => "/test-unique-link"}}, + %{"type" => "button", "settings" => %{"href" => "/about"}} + ] + }) + + sources = LinkScanner.find_sources("/test-unique-link") + + assert length(sources) == 1 + [source] = sources + assert source.type == "page_block" + assert source.id == "test-link-page" + assert source.label =~ "hero" + assert source.edit_path == "/admin/pages/test-link-page" + end + + test "finds nav items using a URL" do + Berrypod.Settings.put_setting( + "header_nav", + [%{"label" => "Shop", "href" => "/test-nav-link"}], + "json" + ) + + sources = LinkScanner.find_sources("/test-nav-link") + + assert length(sources) == 1 + [source] = sources + assert source.type == "nav_item" + assert source.label =~ "Header nav" + assert source.edit_path == "/admin/navigation" + end + + test "returns empty for unused URL" do + assert LinkScanner.find_sources("/nonexistent") == [] + end + end +end