diff --git a/config/config.exs b/config/config.exs index faca4fe..8fc2e19 100644 --- a/config/config.exs +++ b/config/config.exs @@ -97,6 +97,7 @@ config :berrypod, Oban, {"0 3 * * *", Berrypod.Analytics.RetentionWorker}, {"0 4 * * *", Berrypod.Orders.AbandonedCartPruneWorker}, {"0 5 * * 1", Berrypod.Workers.RedirectPrunerWorker}, + {"30 3 * * *", Berrypod.Workers.DeadLinkCheckerWorker}, {"0 2 * * *", Berrypod.Newsletter.CleanupWorker}, {"*/5 * * * *", Berrypod.Newsletter.ScheduledCampaignWorker} ]} diff --git a/lib/berrypod/pages.ex b/lib/berrypod/pages.ex index 8f0865d..92f19ae 100644 --- a/lib/berrypod/pages.ex +++ b/lib/berrypod/pages.ex @@ -122,6 +122,7 @@ defmodule Berrypod.Pages do case result do {:ok, page} -> PageCache.invalidate(slug) + enqueue_link_check(slug) {:ok, page} error -> @@ -176,6 +177,7 @@ defmodule Berrypod.Pages do }) end + enqueue_link_check(updated.slug) {:ok, updated} error -> @@ -256,6 +258,10 @@ defmodule Berrypod.Pages do # ── Helpers ─────────────────────────────────────────────────────── + defp enqueue_link_check(slug) do + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{"page_slug" => slug})) + end + defp page_to_map(%Page{} = page) do %{ slug: page.slug, diff --git a/lib/berrypod/redirects.ex b/lib/berrypod/redirects.ex index 72c19c4..cda6bc6 100644 --- a/lib/berrypod/redirects.ex +++ b/lib/berrypod/redirects.ex @@ -9,7 +9,7 @@ defmodule Berrypod.Redirects do import Ecto.Query alias Berrypod.Repo - alias Berrypod.Redirects.{Redirect, BrokenUrl} + alias Berrypod.Redirects.{Redirect, BrokenUrl, DeadLink} @table :redirects_cache @pubsub_topic "redirects" @@ -407,4 +407,103 @@ defmodule Berrypod.Redirects do {:ok, count} end + + # ── Dead links ── + + @doc """ + Lists dead links, paginated, ordered by most recent first. + """ + def list_dead_links_paginated(opts \\ []) do + status = opts[:status] || "broken" + + from(d in DeadLink, + where: d.status == ^status, + order_by: [desc: d.last_checked_at] + ) + |> Berrypod.Pagination.paginate(page: opts[:page], per_page: opts[:per_page] || 25) + end + + @doc """ + Counts dead links with status "broken". + """ + def count_dead_links do + Repo.aggregate(from(d in DeadLink, where: d.status == "broken"), :count) + end + + @doc """ + Gets a dead link by ID. + """ + def get_dead_link!(id), do: Repo.get!(DeadLink, id) + + @doc """ + Gets a dead link by URL, or nil. + """ + def get_dead_link_by_url(url) do + Repo.one(from d in DeadLink, where: d.url == ^url) + end + + @doc """ + Creates or updates a dead link record for a broken URL. + """ + def upsert_dead_link(attrs) do + now = DateTime.utc_now() |> DateTime.truncate(:second) + attrs = Map.put(attrs, :last_checked_at, now) + + case get_dead_link_by_url(attrs[:url] || attrs["url"]) do + nil -> + %DeadLink{} + |> DeadLink.changeset(attrs) + |> Repo.insert() + + existing -> + # Don't overwrite an "ignored" status + attrs = + if existing.status == "ignored", + do: Map.delete(attrs, :status), + else: attrs + + existing + |> DeadLink.changeset(attrs) + |> Repo.update() + end + end + + @doc """ + Removes a dead link record (URL is now healthy). + """ + def clear_healthy_link(url) do + case get_dead_link_by_url(url) do + nil -> :ok + %{status: "ignored"} -> :ok + dead_link -> Repo.delete(dead_link) + end + end + + @doc """ + Marks a dead link as ignored. + """ + def ignore_dead_link(%DeadLink{} = dead_link) do + dead_link + |> DeadLink.changeset(%{status: "ignored"}) + |> Repo.update() + end + + @doc """ + Deletes a dead link. + """ + def delete_dead_link(%DeadLink{} = dead_link) do + Repo.delete(dead_link) + end + + @doc """ + Removes dead link records for URLs no longer present in any content. + """ + def prune_orphan_dead_links(current_urls) do + url_set = MapSet.new(current_urls) + + from(d in DeadLink) + |> Repo.all() + |> Enum.filter(fn d -> not MapSet.member?(url_set, d.url) end) + |> Enum.each(&Repo.delete/1) + end end diff --git a/lib/berrypod/redirects/dead_link.ex b/lib/berrypod/redirects/dead_link.ex new file mode 100644 index 0000000..02981ff --- /dev/null +++ b/lib/berrypod/redirects/dead_link.ex @@ -0,0 +1,30 @@ +defmodule Berrypod.Redirects.DeadLink do + use Ecto.Schema + import Ecto.Changeset + + @primary_key {:id, :binary_id, autogenerate: true} + @foreign_key_type :binary_id + + @url_types ~w(internal external) + @statuses ~w(broken ignored) + + schema "dead_links" do + field :url, :string + field :url_type, :string, default: "external" + field :status, :string, default: "broken" + field :http_status, :integer + field :error, :string + field :last_checked_at, :utc_datetime + + timestamps() + end + + def changeset(dead_link, attrs) do + dead_link + |> cast(attrs, [:url, :url_type, :status, :http_status, :error, :last_checked_at]) + |> validate_required([:url, :url_type, :last_checked_at]) + |> validate_inclusion(:url_type, @url_types) + |> validate_inclusion(:status, @statuses) + |> unique_constraint(:url) + end +end diff --git a/lib/berrypod/redirects/link_checker.ex b/lib/berrypod/redirects/link_checker.ex new file mode 100644 index 0000000..abae5a0 --- /dev/null +++ b/lib/berrypod/redirects/link_checker.ex @@ -0,0 +1,127 @@ +defmodule Berrypod.Redirects.LinkChecker do + @moduledoc """ + Validates URLs found by the link scanner. + + Internal links are checked against the router and database. + External links are checked via HTTP HEAD requests. + """ + + alias Berrypod.Products + alias Berrypod.Pages + + # Static routes that are always valid + @static_paths ~w( + / /about /contact /delivery /privacy /terms /cart /search /coming-soon + /collections/all + ) + + @doc """ + Checks whether an internal path is valid. + + Returns `:ok` or `{:error, reason}`. + """ + def check_internal(path) do + cond do + path in @static_paths -> + :ok + + match?("/products/" <> _, path) -> + slug = String.replace_prefix(path, "/products/", "") + if Products.get_visible_product(slug), do: :ok, else: {:error, "product not found"} + + match?("/collections/" <> _, path) -> + # Collection routes are always valid (they filter, never 404) + :ok + + true -> + # Could be a custom page — check the slug (strip leading /) + slug = String.replace_prefix(path, "/", "") + + if Pages.get_page(slug) do + :ok + else + {:error, "page not found"} + end + end + end + + @doc """ + Checks whether an external URL is reachable. + + Uses HTTP HEAD with a fallback to GET (some servers reject HEAD). + Returns `:ok`, `{:error, status, reason}`, or `{:error, reason}`. + """ + def check_external(url) do + req = build_req() + + case Req.head(req, url: url) do + {:ok, %{status: status}} when status in 200..399 -> + :ok + + {:ok, %{status: 405}} -> + # HEAD not allowed, try GET + check_external_get(req, url) + + {:ok, %{status: status}} -> + {:error, status, status_reason(status)} + + {:error, %{reason: reason}} -> + {:error, format_error(reason)} + + {:error, reason} -> + {:error, format_error(reason)} + end + end + + defp check_external_get(req, url) do + case Req.get(req, url: url) do + {:ok, %{status: status}} when status in 200..399 -> + :ok + + {:ok, %{status: status}} -> + {:error, status, status_reason(status)} + + {:error, %{reason: reason}} -> + {:error, format_error(reason)} + + {:error, reason} -> + {:error, format_error(reason)} + end + end + + defp build_req do + Req.new( + connect_options: [timeout: 5_000], + receive_timeout: 5_000, + max_retries: 0, + redirect: true, + max_redirects: 5, + headers: [{"user-agent", "Berrypod Link Checker/1.0"}] + ) + |> maybe_attach_test_plug() + end + + defp maybe_attach_test_plug(req) do + if plug = Application.get_env(:berrypod, :link_checker_plug) do + Req.merge(req, plug: plug) + else + req + end + end + + defp status_reason(404), do: "not found" + defp status_reason(403), do: "forbidden" + defp status_reason(410), do: "gone" + defp status_reason(500), do: "server error" + defp status_reason(502), do: "bad gateway" + defp status_reason(503), do: "service unavailable" + defp status_reason(status), do: "HTTP #{status}" + + defp format_error(:timeout), do: "timeout" + defp format_error(:econnrefused), do: "connection refused" + defp format_error(:nxdomain), do: "DNS lookup failed" + defp format_error(:closed), do: "connection closed" + defp format_error(%Mint.TransportError{reason: reason}), do: format_error(reason) + defp format_error(reason) when is_atom(reason), do: to_string(reason) + defp format_error(reason), do: inspect(reason) +end diff --git a/lib/berrypod/redirects/link_scanner.ex b/lib/berrypod/redirects/link_scanner.ex new file mode 100644 index 0000000..858d29d --- /dev/null +++ b/lib/berrypod/redirects/link_scanner.ex @@ -0,0 +1,189 @@ +defmodule Berrypod.Redirects.LinkScanner do + @moduledoc """ + Extracts outgoing URLs from page blocks and navigation items. + + Used by the dead link checker to discover all links in site content, + and by the admin UI to show where a broken URL is used. + """ + + alias Berrypod.Pages + alias Berrypod.Settings + + # Block settings keys that contain URLs + @url_keys ~w(cta_href secondary_cta_href link_href href url) + + @doc """ + Scans all pages and navigation for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + URLs are deduplicated — each unique URL appears once with all its sources. + """ + def scan_all do + page_links = scan_all_pages() + nav_links = scan_nav() + + (page_links ++ nav_links) + |> group_by_url() + end + + @doc """ + Scans a single page's blocks for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + """ + def scan_page(page) do + page + |> extract_page_links() + |> group_by_url() + end + + @doc """ + Scans navigation items for URLs. + + Returns a list of `%{url: string, type: :internal | :external, sources: [source]}`. + """ + def scan_nav do + header_items = load_nav_items("header_nav") + footer_items = load_nav_items("footer_nav") + + header_links = + Enum.flat_map(header_items, fn item -> + case extract_url(item["href"]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: nav_source("header", item)}] + end + end) + + footer_links = + Enum.flat_map(footer_items, fn item -> + case extract_url(item["href"]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: nav_source("footer", item)}] + end + end) + + (header_links ++ footer_links) + |> group_by_url() + end + + @doc """ + Finds all sources (pages, nav items) where a specific URL is used. + + Returns a list of `%{type: string, id: string, label: string, edit_path: string}`. + """ + def find_sources(url) do + page_sources = + Pages.list_all_pages() + |> Enum.flat_map(fn page -> + blocks = page[:blocks] || [] + + blocks + |> Enum.flat_map(fn block -> + settings = block["settings"] || %{} + + @url_keys + |> Enum.filter(fn key -> settings[key] == url end) + |> Enum.map(fn _key -> page_source(page, block) end) + end) + end) + + nav_sources = find_nav_sources(url) + + page_sources ++ nav_sources + end + + # ── Private ────────────────────────────────────────────────────── + + defp scan_all_pages do + Pages.list_all_pages() + |> Enum.flat_map(&extract_page_links/1) + end + + defp extract_page_links(page) do + blocks = page[:blocks] || [] + + Enum.flat_map(blocks, fn block -> + settings = block["settings"] || %{} + + @url_keys + |> Enum.flat_map(fn key -> + case extract_url(settings[key]) do + nil -> [] + url -> [%{url: url, type: classify(url), source: page_source(page, block)}] + end + end) + end) + end + + defp extract_url(nil), do: nil + defp extract_url(""), do: nil + defp extract_url("#" <> _), do: nil + defp extract_url("mailto:" <> _), do: nil + defp extract_url("tel:" <> _), do: nil + defp extract_url("{{" <> _), do: nil + defp extract_url(url) when is_binary(url), do: String.trim(url) + defp extract_url(_), do: nil + + defp classify("/" <> _), do: :internal + defp classify("http" <> _), do: :external + defp classify(_), do: :external + + defp page_source(page, block) do + block_type = block["type"] || "unknown" + page_title = page[:title] || page[:slug] || "Unknown" + + %{ + type: "page_block", + id: to_string(page[:slug]), + label: "#{page_title} — #{block_type}", + edit_path: "/admin/pages/#{page[:slug]}" + } + end + + defp nav_source(location, item) do + label = item["label"] || "Unknown" + + %{ + type: "nav_item", + id: "#{location}_nav", + label: "#{String.capitalize(location)} nav — #{label}", + edit_path: "/admin/navigation" + } + end + + defp find_nav_sources(url) do + header_items = load_nav_items("header_nav") + footer_items = load_nav_items("footer_nav") + + header = + header_items + |> Enum.filter(&(&1["href"] == url)) + |> Enum.map(&nav_source("header", &1)) + + footer = + footer_items + |> Enum.filter(&(&1["href"] == url)) + |> Enum.map(&nav_source("footer", &1)) + + header ++ footer + end + + defp load_nav_items(key) do + case Settings.get_setting(key) do + items when is_list(items) -> items + _ -> [] + end + end + + defp group_by_url(links) do + links + |> Enum.group_by(& &1.url) + |> Enum.map(fn {url, entries} -> + %{ + url: url, + type: hd(entries).type, + sources: Enum.map(entries, & &1.source) + } + end) + end +end diff --git a/lib/berrypod/workers/dead_link_checker_worker.ex b/lib/berrypod/workers/dead_link_checker_worker.ex new file mode 100644 index 0000000..b6c5188 --- /dev/null +++ b/lib/berrypod/workers/dead_link_checker_worker.ex @@ -0,0 +1,164 @@ +defmodule Berrypod.Workers.DeadLinkCheckerWorker do + @moduledoc """ + Daily Oban cron job that scans all content for broken links. + + Can also be run on-demand for a single page via `new(%{"page_slug" => slug})`. + """ + + use Oban.Worker, queue: :default, max_attempts: 1 + + require Logger + + alias Berrypod.Redirects + alias Berrypod.Redirects.{LinkScanner, LinkChecker} + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"page_slug" => slug}}) do + check_page(slug) + broadcast_changed() + :ok + end + + def perform(%Oban.Job{args: %{"check_url" => url}}) do + check_url(url) + broadcast_changed() + :ok + end + + def perform(_job) do + check_all() + broadcast_changed() + :ok + end + + @doc """ + Runs a full scan of all content links. + """ + def check_all do + links = LinkScanner.scan_all() + all_urls = Enum.map(links, & &1.url) + + {broken, healthy} = + links + |> Enum.uniq_by(& &1.url) + |> Enum.split_with(&broken?/1) + + # Record broken links + for link <- broken do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: link.url, + url_type: to_string(link.type), + status: "broken", + http_status: http_status, + error: error + }) + end + + # Clear any previously-broken links that are now healthy + for link <- healthy do + Redirects.clear_healthy_link(link.url) + end + + # Remove dead_links for URLs no longer in content + Redirects.prune_orphan_dead_links(all_urls) + + broken_count = length(broken) + + if broken_count > 0 do + Logger.warning("Dead link check: #{broken_count} broken link(s) found") + end + + :ok + end + + @doc """ + Checks links for a single page. + """ + def check_page(slug) do + case Berrypod.Pages.get_page(slug) do + nil -> + :ok + + page -> + links = LinkScanner.scan_page(page) + + for link <- Enum.uniq_by(links, & &1.url) do + if broken?(link) do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: link.url, + url_type: to_string(link.type), + status: "broken", + http_status: http_status, + error: error + }) + else + Redirects.clear_healthy_link(link.url) + end + end + + :ok + end + end + + @doc """ + Re-checks a single URL and updates its dead_link record. + """ + def check_url(url) do + type = if String.starts_with?(url, "/"), do: :internal, else: :external + link = %{url: url, type: type} + + if broken?(link) do + {http_status, error} = check_result(link) + + Redirects.upsert_dead_link(%{ + url: url, + url_type: to_string(type), + status: "broken", + http_status: http_status, + error: error + }) + else + Redirects.clear_healthy_link(url) + end + + :ok + end + + defp broadcast_changed do + Phoenix.PubSub.broadcast(Berrypod.PubSub, "redirects", :dead_links_changed) + end + + defp broken?(%{type: :internal, url: url}) do + case LinkChecker.check_internal(url) do + :ok -> false + {:error, _} -> true + end + end + + defp broken?(%{type: :external, url: url}) do + case LinkChecker.check_external(url) do + :ok -> false + {:error, _} -> true + {:error, _, _} -> true + end + end + + defp check_result(%{type: :internal, url: url}) do + case LinkChecker.check_internal(url) do + {:error, reason} -> {nil, reason} + :ok -> {nil, nil} + end + end + + defp check_result(%{type: :external, url: url}) do + case LinkChecker.check_external(url) do + {:error, status, reason} -> {status, reason} + {:error, reason} -> {nil, reason} + :ok -> {nil, nil} + end + end +end diff --git a/lib/berrypod_web/live/admin/redirects.ex b/lib/berrypod_web/live/admin/redirects.ex index 2bbb651..e77c381 100644 --- a/lib/berrypod_web/live/admin/redirects.ex +++ b/lib/berrypod_web/live/admin/redirects.ex @@ -2,8 +2,9 @@ defmodule BerrypodWeb.Admin.Redirects do use BerrypodWeb, :live_view alias Berrypod.Redirects + alias Berrypod.Redirects.LinkScanner - @valid_tabs ~w(redirects broken create) + @valid_tabs ~w(redirects broken dead_links create) @impl true def mount(_params, _session, socket) do @@ -11,14 +12,18 @@ defmodule BerrypodWeb.Admin.Redirects do redirect_page = Redirects.list_redirects_paginated(page: 1) broken_page = Redirects.list_broken_urls_paginated(page: 1) + dead_link_page = Redirects.list_dead_links_paginated(page: 1) socket = socket |> assign(:page_title, "Redirects") |> assign(:redirect_pagination, redirect_page) |> assign(:broken_url_pagination, broken_page) + |> assign(:dead_link_pagination, dead_link_page) + |> assign(:dead_link_count, Redirects.count_dead_links()) |> stream(:redirects, redirect_page.items) |> stream(:broken_urls, broken_page.items) + |> stream(:dead_links, dead_link_page.items) |> assign( :form, to_form(%{"from_path" => "", "to_path" => "", "status_code" => "301"}, as: :redirect) @@ -50,6 +55,13 @@ defmodule BerrypodWeb.Admin.Redirects do |> assign(:broken_url_pagination, page) |> stream(:broken_urls, page.items, reset: true) + "dead_links" -> + page = Redirects.list_dead_links_paginated(page: page_num) + + socket + |> assign(:dead_link_pagination, page) + |> stream(:dead_links, page.items, reset: true) + _ -> socket end @@ -76,6 +88,16 @@ defmodule BerrypodWeb.Admin.Redirects do |> stream(:broken_urls, page.items, reset: true)} end + def handle_info(:dead_links_changed, socket) do + page = Redirects.list_dead_links_paginated(page: socket.assigns.dead_link_pagination.page) + + {:noreply, + socket + |> assign(:dead_link_pagination, page) + |> assign(:dead_link_count, Redirects.count_dead_links()) + |> stream(:dead_links, page.items, reset: true)} + end + @impl true def handle_event("switch_tab", %{"tab" => tab}, socket) do {:noreply, push_patch(socket, to: ~p"/admin/redirects?#{%{tab: tab}}")} @@ -132,6 +154,32 @@ defmodule BerrypodWeb.Admin.Redirects do |> stream(:broken_urls, page.items, reset: true)} end + def handle_event("ignore_dead_link", %{"id" => id}, socket) do + dead_link = Redirects.get_dead_link!(id) + {:ok, _} = Redirects.ignore_dead_link(dead_link) + + page = Redirects.list_dead_links_paginated(page: socket.assigns.dead_link_pagination.page) + + {:noreply, + socket + |> assign(:dead_link_pagination, page) + |> assign(:dead_link_count, Redirects.count_dead_links()) + |> stream(:dead_links, page.items, reset: true)} + end + + def handle_event("recheck_dead_link", %{"id" => id}, socket) do + dead_link = Redirects.get_dead_link!(id) + + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{"check_url" => dead_link.url})) + + {:noreply, put_flash(socket, :info, "Re-checking #{dead_link.url}...")} + end + + def handle_event("check_all_links", _params, socket) do + Oban.insert(Berrypod.Workers.DeadLinkCheckerWorker.new(%{})) + {:noreply, put_flash(socket, :info, "Full link check started...")} + end + def handle_event("redirect_broken_url", %{"path" => path}, socket) do socket = socket @@ -164,6 +212,12 @@ defmodule BerrypodWeb.Admin.Redirects do count={@broken_url_pagination.total_count} active={@tab} /> + <.tab_button + tab="dead_links" + label="Dead links" + count={@dead_link_count} + active={@tab} + /> <.tab_button tab="create" label="Create" active={@tab} /> @@ -175,6 +229,10 @@ defmodule BerrypodWeb.Admin.Redirects do <.broken_urls_table streams={@streams} pagination={@broken_url_pagination} /> <% end %> + <%= if @tab == "dead_links" do %> + <.dead_links_table streams={@streams} pagination={@dead_link_pagination} /> + <% end %> + <%= if @tab == "create" do %> <.create_form form={@form} /> <% end %> @@ -287,6 +345,71 @@ defmodule BerrypodWeb.Admin.Redirects do """ end + defp dead_links_table(assigns) do + ~H""" +
No dead links detected.
+ <% else %> +| URL | +Type | +Error | +Used in | +Last checked | ++ |
|---|---|---|---|---|---|
{dead_link.url} |
+ + + {dead_link.url_type} + + | +{format_dead_link_error(dead_link)} | +<.dead_link_sources url={dead_link.url} /> | +{Calendar.strftime(dead_link.last_checked_at, "%d %b %Y %H:%M")} | ++ + + | +