add privacy-first analytics with progressive event collection
All checks were successful
deploy / deploy (push) Successful in 3m20s

Three-layer pipeline: Plug for all HTTP requests (no JS needed), LiveView
hook for SPA navigations, JS hook for screen width. ETS-backed buffer
batches writes to SQLite every 10s. Daily-rotating salt for visitor hashing.
Includes admin dashboard with date ranges, visitor trends, top pages,
sources, devices, and e-commerce conversion funnel. Oban cron for 12-month
data retention.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jamey
2026-02-22 12:50:55 +00:00
parent b0aed4c1d6
commit 2bd2e613c7
29 changed files with 2277 additions and 10 deletions

View File

@@ -0,0 +1,87 @@
defmodule Berrypod.Analytics.BufferTest do
use Berrypod.DataCase, async: false
alias Berrypod.Analytics.Buffer
alias Berrypod.Analytics.Event
alias Berrypod.Repo
import Ecto.Query
setup do
# Flush any pending events from previous tests
send(Buffer, :flush)
:timer.sleep(50)
:ok
end
describe "record/1 and flush" do
test "buffered events are flushed to the database" do
visitor_hash = :crypto.strong_rand_bytes(8)
Buffer.record(%{
name: "pageview",
pathname: "/buffer-test",
visitor_hash: visitor_hash,
browser: "Chrome",
os: "macOS"
})
send(Buffer, :flush)
:timer.sleep(50)
events =
from(e in Event, where: e.visitor_hash == ^visitor_hash)
|> Repo.all()
assert length(events) == 1
event = hd(events)
assert event.name == "pageview"
assert event.pathname == "/buffer-test"
assert event.visitor_hash == visitor_hash
assert event.browser == "Chrome"
assert event.os == "macOS"
assert event.session_hash != nil
assert byte_size(event.session_hash) == 8
end
test "events within 30 min get the same session_hash" do
visitor_hash = :crypto.strong_rand_bytes(8)
Buffer.record(%{name: "pageview", pathname: "/", visitor_hash: visitor_hash})
Buffer.record(%{name: "pageview", pathname: "/products", visitor_hash: visitor_hash})
send(Buffer, :flush)
:timer.sleep(50)
events =
from(e in Event, where: e.visitor_hash == ^visitor_hash)
|> Repo.all()
assert length(events) == 2
session_hashes = Enum.map(events, & &1.session_hash) |> Enum.uniq()
assert length(session_hashes) == 1
end
test "different visitors get different session_hashes" do
visitor1 = :crypto.strong_rand_bytes(8)
visitor2 = :crypto.strong_rand_bytes(8)
Buffer.record(%{name: "pageview", pathname: "/", visitor_hash: visitor1})
Buffer.record(%{name: "pageview", pathname: "/", visitor_hash: visitor2})
send(Buffer, :flush)
:timer.sleep(50)
events =
from(e in Event, where: e.visitor_hash in [^visitor1, ^visitor2])
|> Repo.all()
assert length(events) == 2
session_hashes = Enum.map(events, & &1.session_hash) |> Enum.uniq()
assert length(session_hashes) == 2
end
end
end

View File

@@ -0,0 +1,74 @@
defmodule Berrypod.Analytics.ReferrerTest do
use ExUnit.Case, async: true
alias Berrypod.Analytics.Referrer
describe "parse/1" do
test "extracts Google search" do
assert Referrer.parse("https://www.google.com/search?q=test") == {"google.com", "Google"}
end
test "extracts Google with country TLD" do
assert Referrer.parse("https://www.google.co.uk/") == {"google.co.uk", "Google"}
end
test "extracts Facebook" do
assert Referrer.parse("https://www.facebook.com/some-page") == {"facebook.com", "Facebook"}
end
test "extracts Twitter / X" do
assert Referrer.parse("https://t.co/abc123") == {"t.co", "Telegram"}
assert Referrer.parse("https://twitter.com/user") == {"twitter.com", "Twitter"}
assert Referrer.parse("https://x.com/user") == {"x.com", "Twitter"}
end
test "extracts Reddit" do
assert Referrer.parse("https://www.reddit.com/r/elixir") == {"reddit.com", "Reddit"}
end
test "returns nil source for unknown domain" do
assert Referrer.parse("https://myblog.example.com/post/1") ==
{"myblog.example.com", nil}
end
test "strips www prefix" do
assert Referrer.parse("https://www.example.com/page") == {"example.com", nil}
end
test "returns nil for nil input" do
assert Referrer.parse(nil) == {nil, nil}
end
test "returns nil for empty string" do
assert Referrer.parse("") == {nil, nil}
end
test "returns nil for invalid URL" do
assert Referrer.parse("not a url") == {nil, nil}
end
test "extracts DuckDuckGo" do
assert Referrer.parse("https://duckduckgo.com/?q=test") == {"duckduckgo.com", "DuckDuckGo"}
end
test "extracts YouTube" do
assert Referrer.parse("https://www.youtube.com/watch?v=abc") == {"youtube.com", "YouTube"}
end
test "extracts Instagram" do
assert Referrer.parse("https://www.instagram.com/user") == {"instagram.com", "Instagram"}
end
test "extracts LinkedIn" do
assert Referrer.parse("https://www.linkedin.com/feed") == {"linkedin.com", "LinkedIn"}
end
test "extracts Pinterest" do
assert Referrer.parse("https://www.pinterest.com/pin/123") == {"pinterest.com", "Pinterest"}
end
test "extracts Bing" do
assert Referrer.parse("https://www.bing.com/search?q=test") == {"bing.com", "Bing"}
end
end
end

View File

@@ -0,0 +1,40 @@
defmodule Berrypod.Analytics.RetentionWorkerTest do
use Berrypod.DataCase, async: false
alias Berrypod.Analytics.{Event, RetentionWorker}
alias Berrypod.Repo
test "deletes events older than 12 months" do
old = DateTime.add(DateTime.utc_now(), -400, :day) |> DateTime.truncate(:second)
recent = DateTime.utc_now() |> DateTime.truncate(:second)
Repo.insert_all(Event, [
[
id: Ecto.UUID.generate(),
name: "pageview",
pathname: "/old",
visitor_hash: :crypto.strong_rand_bytes(8),
session_hash: :crypto.strong_rand_bytes(8),
inserted_at: old
],
[
id: Ecto.UUID.generate(),
name: "pageview",
pathname: "/recent",
visitor_hash: :crypto.strong_rand_bytes(8),
session_hash: :crypto.strong_rand_bytes(8),
inserted_at: recent
]
])
assert :ok = RetentionWorker.perform(%Oban.Job{})
events = Repo.all(Event)
assert length(events) == 1
assert hd(events).pathname == "/recent"
end
test "no-op when no old events exist" do
assert :ok = RetentionWorker.perform(%Oban.Job{})
end
end

View File

@@ -0,0 +1,37 @@
defmodule Berrypod.Analytics.SaltTest do
use ExUnit.Case, async: true
alias Berrypod.Analytics.Salt
describe "hash_visitor/2" do
test "returns an 8-byte binary" do
hash = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0")
assert is_binary(hash)
assert byte_size(hash) == 8
end
test "same inputs produce the same hash" do
hash1 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0")
hash2 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0")
assert hash1 == hash2
end
test "different IPs produce different hashes" do
hash1 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0")
hash2 = Salt.hash_visitor({192, 168, 1, 1}, "Mozilla/5.0")
assert hash1 != hash2
end
test "different user agents produce different hashes" do
hash1 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0 Chrome")
hash2 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0 Firefox")
assert hash1 != hash2
end
test "accepts IP as a binary string" do
hash1 = Salt.hash_visitor("127.0.0.1", "Mozilla/5.0")
hash2 = Salt.hash_visitor({127, 0, 0, 1}, "Mozilla/5.0")
assert hash1 == hash2
end
end
end

View File

@@ -0,0 +1,81 @@
defmodule Berrypod.Analytics.UAParserTest do
use ExUnit.Case, async: true
alias Berrypod.Analytics.UAParser
describe "parse/1" do
test "detects Chrome on macOS" do
ua =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
assert UAParser.parse(ua) == {"Chrome", "macOS"}
end
test "detects Firefox on Windows" do
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
assert UAParser.parse(ua) == {"Firefox", "Windows"}
end
test "detects Safari on iOS (iPhone)" do
ua =
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1"
assert UAParser.parse(ua) == {"Safari", "iOS"}
end
test "detects Chrome on Android" do
ua =
"Mozilla/5.0 (Linux; Android 14; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36"
assert UAParser.parse(ua) == {"Chrome", "Android"}
end
test "detects Edge on Windows" do
ua =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
assert UAParser.parse(ua) == {"Edge", "Windows"}
end
test "detects Chrome on Linux" do
ua =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
assert UAParser.parse(ua) == {"Chrome", "Linux"}
end
test "detects bots" do
ua = "Googlebot/2.1 (+http://www.google.com/bot.html)"
assert UAParser.parse(ua) == {"Bot", "Other"}
end
test "detects Bingbot" do
ua =
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
assert UAParser.parse(ua) == {"Bot", "Other"}
end
test "returns Other/Other for nil" do
assert UAParser.parse(nil) == {"Other", "Other"}
end
test "returns Other/Other for empty string" do
assert UAParser.parse("") == {"Other", "Other"}
end
test "detects Opera" do
ua =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 OPR/106.0.0.0"
assert UAParser.parse(ua) == {"Opera", "Windows"}
end
test "detects Safari on iPad (iOS)" do
ua =
"Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1"
assert UAParser.parse(ua) == {"Safari", "iOS"}
end
end
end

View File

@@ -0,0 +1,250 @@
defmodule Berrypod.AnalyticsTest do
use Berrypod.DataCase, async: false
import Ecto.Query
alias Berrypod.Analytics
alias Berrypod.Analytics.{Buffer, Event}
alias Berrypod.Repo
setup do
# Flush any pending events then clear the table so each test starts clean
send(Buffer, :flush)
:timer.sleep(50)
Repo.delete_all(Event)
:ok
end
# Helper to insert events directly (bypassing the buffer for query tests)
defp insert_event(attrs) do
now = DateTime.utc_now() |> DateTime.truncate(:second)
defaults = %{
id: Ecto.UUID.generate(),
name: "pageview",
pathname: "/",
visitor_hash: :crypto.strong_rand_bytes(8),
session_hash: :crypto.strong_rand_bytes(8),
inserted_at: now
}
event = Map.merge(defaults, attrs)
Repo.insert_all(Event, [Map.to_list(event)])
event
end
defp today_range do
today = Date.utc_today()
start_dt = DateTime.new!(today, ~T[00:00:00], "Etc/UTC")
end_dt = DateTime.new!(Date.add(today, 1), ~T[00:00:00], "Etc/UTC")
{start_dt, end_dt}
end
describe "track_pageview/1" do
test "records a pageview through the buffer" do
visitor_hash = :crypto.strong_rand_bytes(8)
Analytics.track_pageview(%{pathname: "/test", visitor_hash: visitor_hash})
send(Buffer, :flush)
:timer.sleep(50)
[event] =
from(e in Event, where: e.visitor_hash == ^visitor_hash) |> Repo.all()
assert event.name == "pageview"
assert event.pathname == "/test"
end
end
describe "track_event/2" do
test "records a named event through the buffer" do
visitor_hash = :crypto.strong_rand_bytes(8)
Analytics.track_event("add_to_cart", %{
pathname: "/products/tee",
visitor_hash: visitor_hash
})
send(Buffer, :flush)
:timer.sleep(50)
[event] =
from(e in Event, where: e.visitor_hash == ^visitor_hash) |> Repo.all()
assert event.name == "add_to_cart"
assert event.pathname == "/products/tee"
end
end
describe "count_visitors/1" do
test "counts distinct visitors" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, pathname: "/"})
insert_event(%{visitor_hash: v1, pathname: "/about"})
insert_event(%{visitor_hash: v2, pathname: "/"})
assert Analytics.count_visitors(today_range()) == 2
end
test "returns 0 for empty range" do
assert Analytics.count_visitors(today_range()) == 0
end
end
describe "count_pageviews/1" do
test "counts all pageview events" do
v1 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, pathname: "/"})
insert_event(%{visitor_hash: v1, pathname: "/about"})
insert_event(%{visitor_hash: v1, pathname: "/products/tee", name: "product_view"})
assert Analytics.count_pageviews(today_range()) == 2
end
end
describe "bounce_rate/1" do
test "100% bounce rate when all sessions have 1 pageview" do
s1 = :crypto.strong_rand_bytes(8)
s2 = :crypto.strong_rand_bytes(8)
insert_event(%{session_hash: s1, pathname: "/"})
insert_event(%{session_hash: s2, pathname: "/about"})
assert Analytics.bounce_rate(today_range()) == 100
end
test "0% bounce rate when all sessions have multiple pageviews" do
session = :crypto.strong_rand_bytes(8)
insert_event(%{session_hash: session, pathname: "/"})
insert_event(%{session_hash: session, pathname: "/about"})
assert Analytics.bounce_rate(today_range()) == 0
end
test "returns 0 for no data" do
assert Analytics.bounce_rate(today_range()) == 0
end
end
describe "top_pages/2" do
test "returns pages sorted by visitor count" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, pathname: "/"})
insert_event(%{visitor_hash: v2, pathname: "/"})
insert_event(%{visitor_hash: v1, pathname: "/about"})
pages = Analytics.top_pages(today_range())
assert hd(pages).pathname == "/"
assert hd(pages).visitors == 2
end
end
describe "top_sources/2" do
test "returns sources sorted by visitor count" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, referrer_source: "Google"})
insert_event(%{visitor_hash: v2, referrer_source: "Google"})
insert_event(%{visitor_hash: v1, referrer_source: "Facebook"})
sources = Analytics.top_sources(today_range())
assert hd(sources).source == "Google"
assert hd(sources).visitors == 2
end
end
describe "top_countries/2" do
test "returns countries sorted by visitor count" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, country_code: "GB"})
insert_event(%{visitor_hash: v2, country_code: "GB"})
insert_event(%{visitor_hash: v1, country_code: "US"})
countries = Analytics.top_countries(today_range())
assert hd(countries).country_code == "GB"
assert hd(countries).visitors == 2
end
end
describe "device_breakdown/2" do
test "returns browser breakdown" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, browser: "Chrome"})
insert_event(%{visitor_hash: v2, browser: "Chrome"})
insert_event(%{visitor_hash: v1, browser: "Firefox"})
browsers = Analytics.device_breakdown(today_range(), :browser)
assert hd(browsers).name == "Chrome"
assert hd(browsers).visitors == 2
end
end
describe "funnel/1" do
test "returns counts for each funnel step" do
v1 = :crypto.strong_rand_bytes(8)
v2 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, name: "product_view", pathname: "/products/tee"})
insert_event(%{visitor_hash: v2, name: "product_view", pathname: "/products/tee"})
insert_event(%{visitor_hash: v1, name: "add_to_cart", pathname: "/products/tee"})
insert_event(%{visitor_hash: v1, name: "checkout_start", pathname: "/checkout"})
insert_event(%{
visitor_hash: v1,
name: "purchase",
pathname: "/checkout/success",
revenue: 2500
})
funnel = Analytics.funnel(today_range())
assert funnel.product_views == 2
assert funnel.add_to_carts == 1
assert funnel.checkouts == 1
assert funnel.purchases == 1
end
end
describe "total_revenue/1" do
test "sums revenue from purchase events" do
v1 = :crypto.strong_rand_bytes(8)
insert_event(%{visitor_hash: v1, name: "purchase", pathname: "/", revenue: 2500})
insert_event(%{visitor_hash: v1, name: "purchase", pathname: "/", revenue: 1500})
assert Analytics.total_revenue(today_range()) == 4000
end
test "returns 0 when no purchases" do
assert Analytics.total_revenue(today_range()) == 0
end
end
describe "delete_events_before/1" do
test "deletes old events" do
old = DateTime.add(DateTime.utc_now(), -400, :day) |> DateTime.truncate(:second)
recent = DateTime.utc_now() |> DateTime.truncate(:second)
insert_event(%{inserted_at: old, pathname: "/old"})
insert_event(%{inserted_at: recent, pathname: "/recent"})
cutoff = DateTime.add(DateTime.utc_now(), -365, :day) |> DateTime.truncate(:second)
{deleted, _} = Analytics.delete_events_before(cutoff)
assert deleted == 1
assert [event] = Repo.all(Event)
assert event.pathname == "/recent"
end
end
end