add privacy-first analytics with progressive event collection
All checks were successful
deploy / deploy (push) Successful in 3m20s

Three-layer pipeline: Plug for all HTTP requests (no JS needed), LiveView
hook for SPA navigations, JS hook for screen width. ETS-backed buffer
batches writes to SQLite every 10s. Daily-rotating salt for visitor hashing.
Includes admin dashboard with date ranges, visitor trends, top pages,
sources, devices, and e-commerce conversion funnel. Oban cron for 12-month
data retention.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jamey
2026-02-22 12:50:55 +00:00
parent b0aed4c1d6
commit 2bd2e613c7
29 changed files with 2277 additions and 10 deletions

284
lib/berrypod/analytics.ex Normal file
View File

@@ -0,0 +1,284 @@
defmodule Berrypod.Analytics do
@moduledoc """
Privacy-first analytics for the storefront.
Inspired by Plausible Analytics: no cookies, no personal data stored,
GDPR-friendly by default. Events are buffered in ETS and flushed to
SQLite in batches.
## Event types
- `pageview` — recorded by the plug (Layer 1) and LiveView hook (Layer 2)
- `product_view` — recorded when a product detail page is viewed
- `add_to_cart` — recorded when an item is added to the cart
- `checkout_start` — recorded when checkout is initiated
- `purchase` — recorded when a purchase is completed (includes revenue)
"""
import Ecto.Query
alias Berrypod.Repo
alias Berrypod.Analytics.{Buffer, Event}
# =====================================================================
# Event recording
# =====================================================================
@doc """
Records a pageview event via the buffer.
Expects a map with at minimum `:pathname` and `:visitor_hash`.
Other fields (referrer, UTMs, browser, os, country_code, screen_size)
are optional.
"""
def track_pageview(attrs) when is_map(attrs) do
attrs
|> Map.put(:name, "pageview")
|> Buffer.record()
end
@doc """
Records a named event via the buffer.
The `name` should be one of: `product_view`, `add_to_cart`,
`checkout_start`, `purchase`.
"""
def track_event(name, attrs) when is_binary(name) and is_map(attrs) do
attrs
|> Map.put(:name, name)
|> Buffer.record()
end
# =====================================================================
# Query helpers
# =====================================================================
@doc """
Counts unique visitors in the given date range.
"""
def count_visitors(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> select([e], count(e.visitor_hash, :distinct))
|> Repo.one()
end
@doc """
Counts total pageviews in the given date range.
"""
def count_pageviews(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> select([e], count())
|> Repo.one()
end
@doc """
Calculates bounce rate as a percentage (0-100).
Bounce = a session with only one pageview.
"""
def bounce_rate(date_range) do
sessions_query =
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.session_hash)
|> select([e], %{
session_hash: e.session_hash,
pageviews: count()
})
result =
from(s in subquery(sessions_query),
select: %{
total: count(),
bounces: sum(fragment("CASE WHEN ? = 1 THEN 1 ELSE 0 END", s.pageviews))
}
)
|> Repo.one()
case result do
%{total: 0} -> 0
%{total: nil} -> 0
%{total: total, bounces: bounces} -> round(bounces / total * 100)
end
end
@doc """
Average visit duration in seconds.
"""
def avg_duration(date_range) do
durations_query =
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.session_hash)
|> having([e], count() > 1)
|> select([e], %{
duration:
fragment(
"CAST(strftime('%s', MAX(?)) AS INTEGER) - CAST(strftime('%s', MIN(?)) AS INTEGER)",
e.inserted_at,
e.inserted_at
)
})
result =
from(d in subquery(durations_query),
select: fragment("COALESCE(AVG(?), 0)", d.duration)
)
|> Repo.one()
round(result)
end
@doc """
Daily visitor counts for the trend chart.
Returns a list of `%{date: ~D[], visitors: integer}` maps.
"""
def visitors_by_date(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], fragment("date(?)", e.inserted_at))
|> select([e], %{
date: fragment("date(?)", e.inserted_at),
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], fragment("date(?)", e.inserted_at))
|> Repo.all()
end
@doc """
Top pages by unique visitors.
"""
def top_pages(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.pathname)
|> select([e], %{
pathname: e.pathname,
visitors: count(e.visitor_hash, :distinct),
pageviews: count()
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Top referrer sources by unique visitors.
"""
def top_sources(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.referrer_source))
|> group_by([e], e.referrer_source)
|> select([e], %{
source: e.referrer_source,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Top referrer domains by unique visitors.
"""
def top_referrers(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.referrer))
|> group_by([e], e.referrer)
|> select([e], %{
referrer: e.referrer,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Country breakdown by unique visitors.
"""
def top_countries(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.country_code))
|> group_by([e], e.country_code)
|> select([e], %{
country_code: e.country_code,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Device breakdown by the given dimension (:browser, :os, or :screen_size).
"""
def device_breakdown(date_range, dimension) when dimension in [:browser, :os, :screen_size] do
field = dimension
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(field(e, ^field)))
|> group_by([e], field(e, ^field))
|> select([e], %{
name: field(e, ^field),
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> Repo.all()
end
@doc """
E-commerce funnel: counts for each step.
Returns `%{product_views: n, add_to_carts: n, checkouts: n, purchases: n}`.
"""
def funnel(date_range) do
counts =
base_query(date_range)
|> where([e], e.name in ["product_view", "add_to_cart", "checkout_start", "purchase"])
|> group_by([e], e.name)
|> select([e], {e.name, count(e.visitor_hash, :distinct)})
|> Repo.all()
|> Map.new()
%{
product_views: Map.get(counts, "product_view", 0),
add_to_carts: Map.get(counts, "add_to_cart", 0),
checkouts: Map.get(counts, "checkout_start", 0),
purchases: Map.get(counts, "purchase", 0)
}
end
@doc """
Total revenue in the given date range (pence).
"""
def total_revenue(date_range) do
base_query(date_range)
|> where([e], e.name == "purchase")
|> select([e], coalesce(sum(e.revenue), 0))
|> Repo.one()
end
@doc """
Deletes events older than the given datetime. Used by the retention worker.
"""
def delete_events_before(datetime) do
from(e in Event, where: e.inserted_at < ^datetime)
|> Repo.delete_all()
end
# ── Private ──
defp base_query({start_date, end_date}) do
from(e in Event,
where: e.inserted_at >= ^start_date and e.inserted_at < ^end_date
)
end
end

View File

@@ -0,0 +1,151 @@
defmodule Berrypod.Analytics.Buffer do
@moduledoc """
ETS-backed event buffer for analytics.
Events are written to ETS via `record/1` (fast, no DB round trip) and
flushed to SQLite every 10 seconds in a single `Repo.insert_all` call.
This prevents write contention on SQLite from individual event inserts.
Also tracks active sessions: events from the same visitor within 30 minutes
get the same `session_hash`. After 30 min of inactivity, a new session starts.
"""
use GenServer
alias Berrypod.Repo
alias Berrypod.Analytics.Event
@flush_interval_ms 10_000
@session_timeout_ms 30 * 60 * 1_000
@event_fields [
:id,
:name,
:pathname,
:visitor_hash,
:session_hash,
:referrer,
:referrer_source,
:utm_source,
:utm_medium,
:utm_campaign,
:country_code,
:screen_size,
:browser,
:os,
:revenue,
:inserted_at
]
@event_defaults Map.new(@event_fields, &{&1, nil})
# ── Public API ──
def start_link(opts \\ []) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@doc """
Buffers an analytics event for batch writing.
Accepts a map of event attributes (must include `:visitor_hash` and `:pathname`).
Assigns a `session_hash` based on the visitor's active session.
Returns `:ok` immediately.
"""
def record(attrs) when is_map(attrs) do
GenServer.cast(__MODULE__, {:record, attrs})
end
# ── GenServer callbacks ──
@impl true
def init(_opts) do
table = :ets.new(:analytics_buffer, [:set, :private])
schedule_flush()
{:ok, %{table: table, counter: 0, sessions: %{}}}
end
@impl true
def handle_cast({:record, attrs}, state) do
now = DateTime.utc_now()
visitor_hash = Map.fetch!(attrs, :visitor_hash)
{session_hash, sessions} = resolve_session(visitor_hash, now, state.sessions)
event =
attrs
|> Map.put(:session_hash, session_hash)
|> Map.put(:id, Ecto.UUID.generate())
|> Map.put(:inserted_at, DateTime.truncate(now, :second))
counter = state.counter + 1
:ets.insert(state.table, {counter, event})
{:noreply, %{state | counter: counter, sessions: sessions}}
end
@impl true
def handle_info(:flush, state) do
state = flush_events(state)
schedule_flush()
{:noreply, state}
end
@impl true
def terminate(_reason, state) do
flush_events(state)
:ok
end
# ── Private ──
defp resolve_session(visitor_hash, now, sessions) do
now_ms = DateTime.to_unix(now, :millisecond)
case Map.get(sessions, visitor_hash) do
%{hash: hash, last_at: last_at} when now_ms - last_at < @session_timeout_ms ->
sessions = Map.put(sessions, visitor_hash, %{hash: hash, last_at: now_ms})
{hash, sessions}
_ ->
hash = :crypto.strong_rand_bytes(8)
sessions = Map.put(sessions, visitor_hash, %{hash: hash, last_at: now_ms})
{hash, sessions}
end
end
defp flush_events(state) do
events = drain_ets(state.table)
if events != [] do
rows =
Enum.map(events, fn {_key, event} ->
@event_defaults
|> Map.merge(Map.take(event, @event_fields))
|> Enum.into([])
end)
Repo.insert_all(Event, rows)
end
# Prune expired sessions
now_ms = DateTime.to_unix(DateTime.utc_now(), :millisecond)
sessions =
Map.filter(state.sessions, fn {_hash, %{last_at: last_at}} ->
now_ms - last_at < @session_timeout_ms
end)
%{state | counter: 0, sessions: sessions}
end
defp drain_ets(table) do
events = :ets.tab2list(table)
:ets.delete_all_objects(table)
events
end
defp schedule_flush do
Process.send_after(self(), :flush, @flush_interval_ms)
end
end

View File

@@ -0,0 +1,30 @@
defmodule Berrypod.Analytics.Event do
@moduledoc """
Schema for analytics events (pageviews, e-commerce events, etc.).
Events are immutable — inserted once, never updated.
"""
use Ecto.Schema
@primary_key {:id, :binary_id, autogenerate: true}
schema "analytics_events" do
field :name, :string
field :pathname, :string
field :visitor_hash, :binary
field :session_hash, :binary
field :referrer, :string
field :referrer_source, :string
field :utm_source, :string
field :utm_medium, :string
field :utm_campaign, :string
field :country_code, :string
field :screen_size, :string
field :browser, :string
field :os, :string
field :revenue, :integer
timestamps(type: :utc_datetime, updated_at: false)
end
end

View File

@@ -0,0 +1,72 @@
defmodule Berrypod.Analytics.Referrer do
@moduledoc """
Referrer cleaning and source categorisation.
Extracts the domain from a referrer URL and maps known domains to
human-readable source names (Google, Facebook, etc.).
"""
@source_map %{
"google" => "Google",
"bing" => "Bing",
"duckduckgo" => "DuckDuckGo",
"yahoo" => "Yahoo",
"baidu" => "Baidu",
"yandex" => "Yandex",
"ecosia" => "Ecosia",
"facebook" => "Facebook",
"instagram" => "Instagram",
"twitter" => "Twitter",
"x" => "Twitter",
"reddit" => "Reddit",
"linkedin" => "LinkedIn",
"pinterest" => "Pinterest",
"tiktok" => "TikTok",
"youtube" => "YouTube",
"t" => "Telegram"
}
@doc """
Parses a referrer URL into `{domain, source}`.
Returns `{nil, nil}` for empty/invalid referrers.
iex> Berrypod.Analytics.Referrer.parse("https://www.google.com/search?q=test")
{"google.com", "Google"}
iex> Berrypod.Analytics.Referrer.parse("https://myblog.example.com/post/1")
{"myblog.example.com", nil}
iex> Berrypod.Analytics.Referrer.parse(nil)
{nil, nil}
"""
@spec parse(String.t() | nil) :: {String.t() | nil, String.t() | nil}
def parse(nil), do: {nil, nil}
def parse(""), do: {nil, nil}
def parse(referrer) when is_binary(referrer) do
case URI.parse(referrer) do
%URI{host: host} when is_binary(host) and host != "" ->
domain = strip_www(host)
source = categorise(domain)
{domain, source}
_ ->
{nil, nil}
end
end
defp strip_www("www." <> rest), do: rest
defp strip_www(host), do: host
defp categorise(domain) do
# Extract the base name from the domain (e.g. "google" from "google.co.uk")
base =
domain
|> String.split(".")
|> List.first()
Map.get(@source_map, base)
end
end

View File

@@ -0,0 +1,30 @@
defmodule Berrypod.Analytics.RetentionWorker do
@moduledoc """
Oban worker that deletes analytics events older than the retention period.
Runs daily via cron. Default retention is 12 months.
"""
use Oban.Worker, queue: :default, max_attempts: 1
alias Berrypod.Analytics
@default_retention_months 12
@impl Oban.Worker
def perform(%Oban.Job{}) do
cutoff =
DateTime.utc_now()
|> DateTime.add(-@default_retention_months * 30, :day)
|> DateTime.truncate(:second)
{deleted, _} = Analytics.delete_events_before(cutoff)
if deleted > 0 do
require Logger
Logger.info("Analytics retention: deleted #{deleted} events older than #{cutoff}")
end
:ok
end
end

View File

@@ -0,0 +1,75 @@
defmodule Berrypod.Analytics.Salt do
@moduledoc """
Daily-rotating salt for privacy-friendly visitor hashing.
Generates a random 32-byte salt on startup and rotates it at midnight UTC.
The visitor_hash is SHA256(salt + IP + UA) truncated to 8 bytes — enough
for unique visitor counting without being reversible. Because the salt
changes daily, the same visitor gets a different hash each day.
"""
use GenServer
@salt_bytes 32
@hash_bytes 8
# ── Public API ──
def start_link(opts \\ []) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@doc """
Hashes a visitor's IP and user agent into an 8-byte binary.
"""
def hash_visitor(ip, user_agent) when is_tuple(ip) do
hash_visitor(:inet.ntoa(ip) |> to_string(), user_agent)
end
def hash_visitor(ip, user_agent) when is_binary(ip) and is_binary(user_agent) do
salt = GenServer.call(__MODULE__, :get_salt)
:crypto.hash(:sha256, [salt, ip, user_agent])
|> binary_part(0, @hash_bytes)
end
# ── GenServer callbacks ──
@impl true
def init(_opts) do
schedule_rotation()
{:ok, %{salt: generate_salt()}}
end
@impl true
def handle_call(:get_salt, _from, state) do
{:reply, state.salt, state}
end
@impl true
def handle_info(:rotate, _state) do
schedule_rotation()
{:noreply, %{salt: generate_salt()}}
end
# ── Private ──
defp generate_salt, do: :crypto.strong_rand_bytes(@salt_bytes)
defp schedule_rotation do
ms_until_midnight = ms_until_next_midnight()
Process.send_after(self(), :rotate, ms_until_midnight)
end
defp ms_until_next_midnight do
now = DateTime.utc_now()
midnight =
now
|> DateTime.to_date()
|> Date.add(1)
|> DateTime.new!(~T[00:00:00], "Etc/UTC")
DateTime.diff(midnight, now, :millisecond)
end
end

View File

@@ -0,0 +1,75 @@
defmodule Berrypod.Analytics.UAParser do
@moduledoc """
Lightweight user agent parsing — extracts browser and OS names.
No external dependencies. Handles 95%+ of real-world traffic correctly.
Order matters: more specific checks (Edge before Chrome, iOS before macOS) come first.
"""
@doc """
Returns `{browser, os}` tuple from a user agent string.
iex> Berrypod.Analytics.UAParser.parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
{"Chrome", "macOS"}
"""
@spec parse(String.t()) :: {String.t(), String.t()}
def parse(ua) when is_binary(ua) do
{parse_browser(ua), parse_os(ua)}
end
def parse(_), do: {"Other", "Other"}
defp parse_browser(ua) do
cond do
String.contains?(ua, "bot") or String.contains?(ua, "Bot") or
String.contains?(ua, "crawl") or String.contains?(ua, "Crawl") or
String.contains?(ua, "spider") or String.contains?(ua, "Spider") ->
"Bot"
String.contains?(ua, "Firefox/") ->
"Firefox"
String.contains?(ua, "Edg/") ->
"Edge"
String.contains?(ua, "OPR/") or String.contains?(ua, "Opera") ->
"Opera"
String.contains?(ua, "Chrome/") ->
"Chrome"
String.contains?(ua, "Safari/") ->
"Safari"
true ->
"Other"
end
end
defp parse_os(ua) do
cond do
String.contains?(ua, "iPhone") or String.contains?(ua, "iPad") or
String.contains?(ua, "iPod") ->
"iOS"
String.contains?(ua, "Android") ->
"Android"
String.contains?(ua, "Mac OS X") or String.contains?(ua, "Macintosh") ->
"macOS"
String.contains?(ua, "Windows") ->
"Windows"
String.contains?(ua, "Linux") ->
"Linux"
String.contains?(ua, "CrOS") ->
"ChromeOS"
true ->
"Other"
end
end
end

View File

@@ -22,6 +22,9 @@ defmodule Berrypod.Application do
{Phoenix.PubSub, name: Berrypod.PubSub},
# Background job processing
{Oban, Application.fetch_env!(:berrypod, Oban)},
# Analytics: daily-rotating salt and ETS event buffer
Berrypod.Analytics.Salt,
Berrypod.Analytics.Buffer,
# Image variant cache - ensures all variants exist on startup
Berrypod.Images.VariantCache,
# Start to serve requests