From 162a5bfe9a3addbf2add30e41e01df019516c34f Mon Sep 17 00:00:00 2001 From: jamey Date: Mon, 23 Feb 2026 14:48:50 +0000 Subject: [PATCH] replace analytics double-count prevention with buffer supersede MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Plug records a pageview with a known ID (plug_ref) into the ETS buffer. When JS connects, the LiveView hook supersedes that event by ID and records its own with full data (screen_size from connect params). If JS never connects, the Plug's event flushes normally after 10s. Also fixes: admin browsing no longer leaks product_view events — the Plug now sets no analytics session data for admins, so all downstream visitor_hash guards naturally filter them out. Replaces the previous time-based skip logic which was brittle and race-prone. The supersede approach is deterministic and handles both the ETS buffer and already-flushed DB cases. Co-Authored-By: Claude Opus 4.6 --- assets/js/app.js | 2 +- lib/berrypod/analytics/buffer.ex | 39 +++++++- lib/berrypod_web/analytics_hook.ex | 107 ++++++++++++--------- lib/berrypod_web/plugs/analytics.ex | 38 ++++---- test/berrypod/analytics/buffer_test.exs | 100 +++++++++++++++++++ test/berrypod_web/plugs/analytics_test.exs | 13 ++- 6 files changed, 231 insertions(+), 68 deletions(-) diff --git a/assets/js/app.js b/assets/js/app.js index 51a24ca..5da03cc 100644 --- a/assets/js/app.js +++ b/assets/js/app.js @@ -591,7 +591,7 @@ const ChartTooltip = { const csrfToken = document.querySelector("meta[name='csrf-token']").getAttribute("content") const liveSocket = new LiveSocket("/live", Socket, { - params: {_csrf_token: csrfToken}, + params: {_csrf_token: csrfToken, screen_width: window.innerWidth}, hooks: {...colocatedHooks, ColorSync, Lightbox, CartPersist, CartDrawer, ProductImageScroll, SearchModal, CollectionFilters, CardRadioScroll, AnalyticsInit, ChartTooltip}, }) diff --git a/lib/berrypod/analytics/buffer.ex b/lib/berrypod/analytics/buffer.ex index f3e549f..12aab57 100644 --- a/lib/berrypod/analytics/buffer.ex +++ b/lib/berrypod/analytics/buffer.ex @@ -55,6 +55,20 @@ defmodule Berrypod.Analytics.Buffer do GenServer.cast(__MODULE__, {:record, attrs}) end + @doc """ + Removes the Plug's initial pageview so the hook can replace it. + + Looks in the ETS buffer first. If the event was already flushed to the DB + (the 10s timer can fire between Plug record and hook connect), deletes it + from there instead. Either way, the hook then records its own event with + full data (screen_size etc.). + + If JS never connects, the Plug's event stays and flushes normally. + """ + def supersede(ref) when is_binary(ref) do + GenServer.cast(__MODULE__, {:supersede, ref}) + end + # ── GenServer callbacks ── @impl true @@ -75,7 +89,7 @@ defmodule Berrypod.Analytics.Buffer do event = attrs |> Map.put(:session_hash, session_hash) - |> Map.put(:id, Ecto.UUID.generate()) + |> Map.put_new(:id, Ecto.UUID.generate()) |> Map.put(:inserted_at, DateTime.truncate(now, :second)) counter = state.counter + 1 @@ -84,6 +98,29 @@ defmodule Berrypod.Analytics.Buffer do {:noreply, %{state | counter: counter, sessions: sessions}} end + @impl true + def handle_cast({:supersede, ref}, state) do + found_in_ets = + state.table + |> :ets.tab2list() + |> Enum.reduce(false, fn {key, event}, found -> + if Map.get(event, :id) == ref do + :ets.delete(state.table, key) + true + else + found + end + end) + + # If the 10s flush already moved it to the DB, delete it there + unless found_in_ets do + import Ecto.Query + Repo.delete_all(from(e in Event, where: e.id == ^ref)) + end + + {:noreply, state} + end + @impl true def handle_info(:flush, state) do state = flush_events(state) diff --git a/lib/berrypod_web/analytics_hook.ex b/lib/berrypod_web/analytics_hook.ex index f1dc343..2cf5e7b 100644 --- a/lib/berrypod_web/analytics_hook.ex +++ b/lib/berrypod_web/analytics_hook.ex @@ -1,19 +1,25 @@ defmodule BerrypodWeb.AnalyticsHook do @moduledoc """ - LiveView on_mount hook for analytics — Layer 2 of the progressive pipeline. + LiveView on_mount hook for analytics. - Reads analytics data from the session (set by the Plugs.Analytics plug) and - tracks subsequent SPA navigations via handle_params. Skips the initial mount - since the plug already recorded that pageview. + Reads session data prepared by the Plugs.Analytics plug (visitor hash, + browser, OS, referrer, UTMs) and records pageviews for SPA navigations. - Also handles the `analytics:screen` event from the JS hook (Layer 3) to - capture screen width for device classification. + The Plug records an initial pageview into the ETS buffer with a unique + `plug_ref`. When JS connects, this hook tells the buffer to drop that + event (by ref) and records its own pageview with full data (screen_size). + If JS never connects (no-JS user), the Plug's event flushes to the DB + after the normal 10-second buffer interval. No timing heuristics needed. + + Screen width is read from the LiveSocket connect params, so it's available + on every LiveView mount without relying on a separate JS hook event. """ import Phoenix.Component, only: [assign: 3] - import Phoenix.LiveView, only: [attach_hook: 4, connected?: 1] + import Phoenix.LiveView, only: [attach_hook: 4, connected?: 1, get_connect_params: 1] alias Berrypod.Analytics + alias Berrypod.Analytics.Buffer def on_mount(:track, _params, session, socket) do socket = @@ -26,56 +32,71 @@ defmodule BerrypodWeb.AnalyticsHook do |> assign(:analytics_utm_source, session["analytics_utm_source"]) |> assign(:analytics_utm_medium, session["analytics_utm_medium"]) |> assign(:analytics_utm_campaign, session["analytics_utm_campaign"]) - |> assign(:analytics_screen_size, session["analytics_screen_size"]) |> assign(:analytics_country_code, session["country_code"]) - |> assign(:analytics_initial_mount, true) + |> assign(:analytics_plug_ref, session["analytics_plug_ref"]) socket = - if connected?(socket) and socket.assigns.analytics_visitor_hash do + if connected?(socket) do + # Supersede the Plug's buffered event — we'll record our own with screen_size + if plug_ref = socket.assigns[:analytics_plug_ref] do + Buffer.supersede(plug_ref) + end + + screen_size = screen_size_from_connect_params(socket) + socket - |> attach_hook(:analytics_params, :handle_params, &handle_analytics_params/3) + |> assign(:analytics_screen_size, screen_size) + |> assign(:analytics_plug_ref, nil) |> attach_hook(:analytics_events, :handle_event, &handle_analytics_event/3) + |> then(fn s -> + if s.assigns.analytics_visitor_hash do + attach_hook(s, :analytics_params, :handle_params, &handle_analytics_params/3) + else + s + end + end) else - socket + assign(socket, :analytics_screen_size, nil) end {:cont, socket} end + defp screen_size_from_connect_params(socket) do + case get_connect_params(socket) do + %{"screen_width" => width} when is_integer(width) -> classify_screen(width) + _ -> nil + end + end + defp handle_analytics_params(_params, uri, socket) do - # Skip the initial mount — the plug already recorded this pageview - if socket.assigns.analytics_initial_mount do - {:cont, assign(socket, :analytics_initial_mount, false)} + if admin?(socket) do + {:cont, socket} else - # Skip if admin user is browsing - if admin?(socket) do - {:cont, socket} - else - pathname = URI.parse(uri).path + pathname = URI.parse(uri).path - Analytics.track_pageview(%{ - pathname: pathname, - visitor_hash: socket.assigns.analytics_visitor_hash, - referrer: socket.assigns.analytics_referrer, - referrer_source: socket.assigns.analytics_referrer_source, - utm_source: socket.assigns.analytics_utm_source, - utm_medium: socket.assigns.analytics_utm_medium, - utm_campaign: socket.assigns.analytics_utm_campaign, - country_code: socket.assigns.analytics_country_code, - screen_size: socket.assigns.analytics_screen_size, - browser: socket.assigns.analytics_browser, - os: socket.assigns.analytics_os - }) + Analytics.track_pageview(%{ + pathname: pathname, + visitor_hash: socket.assigns.analytics_visitor_hash, + referrer: socket.assigns.analytics_referrer, + referrer_source: socket.assigns.analytics_referrer_source, + utm_source: socket.assigns.analytics_utm_source, + utm_medium: socket.assigns.analytics_utm_medium, + utm_campaign: socket.assigns.analytics_utm_campaign, + country_code: socket.assigns.analytics_country_code, + screen_size: socket.assigns.analytics_screen_size, + browser: socket.assigns.analytics_browser, + os: socket.assigns.analytics_os + }) - # Clear referrer/UTMs after first SPA navigation — they only apply to the entry - {:cont, - socket - |> assign(:analytics_referrer, nil) - |> assign(:analytics_referrer_source, nil) - |> assign(:analytics_utm_source, nil) - |> assign(:analytics_utm_medium, nil) - |> assign(:analytics_utm_campaign, nil)} - end + # Clear referrer/UTMs after first tracked navigation — they only apply to the entry + {:cont, + socket + |> assign(:analytics_referrer, nil) + |> assign(:analytics_referrer_source, nil) + |> assign(:analytics_utm_source, nil) + |> assign(:analytics_utm_medium, nil) + |> assign(:analytics_utm_campaign, nil)} end end @@ -96,7 +117,7 @@ defmodule BerrypodWeb.AnalyticsHook do defp handle_analytics_event("analytics:screen", %{"width" => width}, socket) when is_integer(width) do screen_size = classify_screen(width) - {:cont, assign(socket, :analytics_screen_size, screen_size)} + {:halt, assign(socket, :analytics_screen_size, screen_size)} end defp handle_analytics_event(_event, _params, socket), do: {:cont, socket} diff --git a/lib/berrypod_web/plugs/analytics.ex b/lib/berrypod_web/plugs/analytics.ex index 127bfbf..05d9bca 100644 --- a/lib/berrypod_web/plugs/analytics.ex +++ b/lib/berrypod_web/plugs/analytics.ex @@ -1,14 +1,16 @@ defmodule BerrypodWeb.Plugs.Analytics do @moduledoc """ - Plug that records a pageview event on every shop HTTP request. + Plug that records an initial pageview and prepares analytics session data. - This is Layer 1 of the progressive analytics pipeline — it fires on every - GET request regardless of whether JS is enabled. Computes a privacy-friendly - visitor hash, parses the user agent, extracts referrer and UTM params, and - buffers the event for batch writing to SQLite. + Fires on every GET request regardless of JS — computes a privacy-friendly + visitor hash, parses the user agent, extracts referrer and UTM params. + Records the pageview immediately (for no-JS support) and stores the data + in the session for the LiveView hook to use on SPA navigations. - Also stores analytics data in the session so the LiveView hook (Layer 2) - can read it for subsequent SPA navigations. + The event is recorded with a known ID (plug_ref) stored in the session. + When JS connects, the LiveView hook supersedes this event by ID and + records its own with full data (screen_size). If JS never connects, + the Plug's event flushes to the DB normally. """ import Plug.Conn @@ -26,9 +28,11 @@ defmodule BerrypodWeb.Plugs.Analytics do if browser == "Bot" do conn else - # Skip if the logged-in admin is browsing + # Skip recording for logged-in admin — don't set analytics session + # data either, so downstream guards (visitor_hash checks in LiveViews) + # naturally filter out admin browsing for all event types if admin?(conn) do - prepare_session(conn, ua, browser, os) + conn else record_and_prepare(conn, ua, browser, os) end @@ -42,9 +46,10 @@ defmodule BerrypodWeb.Plugs.Analytics do {referrer, referrer_source} = extract_referrer(conn) utm = extract_utms(conn) country_code = get_session(conn, "country_code") - screen_size = get_session(conn, "analytics_screen_size") + plug_ref = Ecto.UUID.generate() Analytics.track_pageview(%{ + id: plug_ref, pathname: conn.request_path, visitor_hash: visitor_hash, referrer: referrer, @@ -53,7 +58,7 @@ defmodule BerrypodWeb.Plugs.Analytics do utm_medium: utm.medium, utm_campaign: utm.campaign, country_code: country_code, - screen_size: screen_size, + screen_size: nil, browser: browser, os: os }) @@ -67,16 +72,7 @@ defmodule BerrypodWeb.Plugs.Analytics do |> put_session("analytics_utm_source", utm.source) |> put_session("analytics_utm_medium", utm.medium) |> put_session("analytics_utm_campaign", utm.campaign) - end - - # Store session data for the hook even when we skip recording for admins - defp prepare_session(conn, ua, browser, os) do - visitor_hash = Salt.hash_visitor(conn.remote_ip, ua) - - conn - |> put_session("analytics_visitor_hash", visitor_hash) - |> put_session("analytics_browser", browser) - |> put_session("analytics_os", os) + |> put_session("analytics_plug_ref", plug_ref) end defp admin?(conn) do diff --git a/test/berrypod/analytics/buffer_test.exs b/test/berrypod/analytics/buffer_test.exs index eb9acc3..721b48b 100644 --- a/test/berrypod/analytics/buffer_test.exs +++ b/test/berrypod/analytics/buffer_test.exs @@ -84,4 +84,104 @@ defmodule Berrypod.Analytics.BufferTest do assert length(session_hashes) == 2 end end + + describe "supersede/1" do + test "removes a buffered event by id before flush" do + visitor_hash = :crypto.strong_rand_bytes(8) + ref = Ecto.UUID.generate() + + Buffer.record(%{ + id: ref, + name: "pageview", + pathname: "/", + visitor_hash: visitor_hash + }) + + Buffer.supersede(ref) + + send(Buffer, :flush) + :timer.sleep(50) + + events = + from(e in Event, where: e.visitor_hash == ^visitor_hash) + |> Repo.all() + + assert events == [] + end + + test "only removes the event with the matching id" do + visitor_hash = :crypto.strong_rand_bytes(8) + ref = Ecto.UUID.generate() + + Buffer.record(%{ + id: ref, + name: "pageview", + pathname: "/", + visitor_hash: visitor_hash + }) + + Buffer.record(%{ + name: "pageview", + pathname: "/about", + visitor_hash: visitor_hash + }) + + Buffer.supersede(ref) + + send(Buffer, :flush) + :timer.sleep(50) + + events = + from(e in Event, where: e.visitor_hash == ^visitor_hash) + |> Repo.all() + + assert length(events) == 1 + assert hd(events).pathname == "/about" + end + + test "removes from DB if event was already flushed" do + visitor_hash = :crypto.strong_rand_bytes(8) + ref = Ecto.UUID.generate() + + Buffer.record(%{ + id: ref, + name: "pageview", + pathname: "/", + visitor_hash: visitor_hash + }) + + # Flush to DB first, simulating the 10s timer firing before supersede + send(Buffer, :flush) + :timer.sleep(50) + + assert Repo.aggregate(from(e in Event, where: e.id == ^ref), :count) == 1 + + # Now supersede — should delete from DB + Buffer.supersede(ref) + :timer.sleep(50) + + assert Repo.aggregate(from(e in Event, where: e.id == ^ref), :count) == 0 + end + + test "no-op when ref does not match any buffered event" do + visitor_hash = :crypto.strong_rand_bytes(8) + + Buffer.record(%{ + name: "pageview", + pathname: "/", + visitor_hash: visitor_hash + }) + + Buffer.supersede(Ecto.UUID.generate()) + + send(Buffer, :flush) + :timer.sleep(50) + + events = + from(e in Event, where: e.visitor_hash == ^visitor_hash) + |> Repo.all() + + assert length(events) == 1 + end + end end diff --git a/test/berrypod_web/plugs/analytics_test.exs b/test/berrypod_web/plugs/analytics_test.exs index 6c4388d..82dd39e 100644 --- a/test/berrypod_web/plugs/analytics_test.exs +++ b/test/berrypod_web/plugs/analytics_test.exs @@ -42,8 +42,6 @@ defmodule BerrypodWeb.Plugs.AnalyticsTest do :timer.sleep(50) count_after = Repo.aggregate(Event, :count) - # POST to /checkout shouldn't create a pageview event via the plug - # (it may fail with a redirect, but the plug should have skipped) assert count_after == count_before end @@ -71,6 +69,17 @@ defmodule BerrypodWeb.Plugs.AnalyticsTest do assert get_session(conn, "analytics_browser") == "Firefox" end + test "stores plug_ref in session for buffer supersede", %{conn: conn} do + conn = + conn + |> put_req_header("user-agent", "Mozilla/5.0 Chrome/120.0") + |> get(~p"/") + + plug_ref = get_session(conn, "analytics_plug_ref") + assert is_binary(plug_ref) + assert String.length(plug_ref) == 36 + end + test "extracts referrer", %{conn: conn} do conn |> put_req_header("user-agent", "Mozilla/5.0 Chrome/120.0")