berrypod/lib/berrypod/analytics.ex

308 lines
8.3 KiB
Elixir
Raw Normal View History

defmodule Berrypod.Analytics do
@moduledoc """
Privacy-first analytics for the storefront.
Inspired by Plausible Analytics: no cookies, no personal data stored,
GDPR-friendly by default. Events are buffered in ETS and flushed to
SQLite in batches.
## Event types
- `pageview` recorded by the plug (Layer 1) and LiveView hook (Layer 2)
- `product_view` recorded when a product detail page is viewed
- `add_to_cart` recorded when an item is added to the cart
- `checkout_start` recorded when checkout is initiated
- `purchase` recorded when a purchase is completed (includes revenue)
"""
import Ecto.Query
alias Berrypod.Repo
alias Berrypod.Analytics.{Buffer, Event}
# =====================================================================
# Event recording
# =====================================================================
@doc """
Records a pageview event via the buffer.
Expects a map with at minimum `:pathname` and `:visitor_hash`.
Other fields (referrer, UTMs, browser, os, country_code, screen_size)
are optional.
"""
def track_pageview(attrs) when is_map(attrs) do
attrs
|> Map.put(:name, "pageview")
|> Buffer.record()
end
@doc """
Records a named event via the buffer.
The `name` should be one of: `product_view`, `add_to_cart`,
`checkout_start`, `purchase`.
"""
def track_event(name, attrs) when is_binary(name) and is_map(attrs) do
attrs
|> Map.put(:name, name)
|> Buffer.record()
end
# =====================================================================
# Query helpers
# =====================================================================
@doc """
Counts unique visitors in the given date range.
"""
def count_visitors(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> select([e], count(e.visitor_hash, :distinct))
|> Repo.one()
end
@doc """
Counts total pageviews in the given date range.
"""
def count_pageviews(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> select([e], count())
|> Repo.one()
end
@doc """
Calculates bounce rate as a percentage (0-100).
Bounce = a session with only one pageview.
"""
def bounce_rate(date_range) do
sessions_query =
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.session_hash)
|> select([e], %{
session_hash: e.session_hash,
pageviews: count()
})
result =
from(s in subquery(sessions_query),
select: %{
total: count(),
bounces: sum(fragment("CASE WHEN ? = 1 THEN 1 ELSE 0 END", s.pageviews))
}
)
|> Repo.one()
case result do
%{total: 0} -> 0
%{total: nil} -> 0
%{total: total, bounces: bounces} -> round(bounces / total * 100)
end
end
@doc """
Average visit duration in seconds.
"""
def avg_duration(date_range) do
durations_query =
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.session_hash)
|> having([e], count() > 1)
|> select([e], %{
duration:
fragment(
"CAST(strftime('%s', MAX(?)) AS INTEGER) - CAST(strftime('%s', MIN(?)) AS INTEGER)",
e.inserted_at,
e.inserted_at
)
})
result =
from(d in subquery(durations_query),
select: fragment("COALESCE(AVG(?), 0)", d.duration)
)
|> Repo.one()
round(result)
end
@doc """
Daily visitor counts for the trend chart.
Returns a list of `%{date: ~D[], visitors: integer}` maps.
"""
def visitors_by_date(date_range) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], fragment("date(?)", e.inserted_at))
|> select([e], %{
date: fragment("date(?)", e.inserted_at),
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], fragment("date(?)", e.inserted_at))
|> Repo.all()
end
@doc """
Hourly visitor counts for the trend chart (used for "today" period).
Returns a list of `%{hour: integer, visitors: integer}` maps for all 24 hours.
"""
def visitors_by_hour(date_range) do
counts =
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], fragment("CAST(strftime('%H', ?) AS INTEGER)", e.inserted_at))
|> select([e], %{
hour: fragment("CAST(strftime('%H', ?) AS INTEGER)", e.inserted_at),
visitors: count(e.visitor_hash, :distinct)
})
|> Repo.all()
|> Map.new(&{&1.hour, &1.visitors})
# Fill in all 24 hours so the chart has no gaps
Enum.map(0..23, fn h ->
%{hour: h, visitors: Map.get(counts, h, 0)}
end)
end
@doc """
Top pages by unique visitors.
"""
def top_pages(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> group_by([e], e.pathname)
|> select([e], %{
pathname: e.pathname,
visitors: count(e.visitor_hash, :distinct),
pageviews: count()
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Top referrer sources by unique visitors.
"""
def top_sources(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.referrer_source))
|> group_by([e], e.referrer_source)
|> select([e], %{
source: e.referrer_source,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Top referrer domains by unique visitors.
"""
def top_referrers(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.referrer))
|> group_by([e], e.referrer)
|> select([e], %{
referrer: e.referrer,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Country breakdown by unique visitors.
"""
def top_countries(date_range, limit \\ 10) do
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(e.country_code))
|> group_by([e], e.country_code)
|> select([e], %{
country_code: e.country_code,
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> limit(^limit)
|> Repo.all()
end
@doc """
Device breakdown by the given dimension (:browser, :os, or :screen_size).
"""
def device_breakdown(date_range, dimension) when dimension in [:browser, :os, :screen_size] do
field = dimension
base_query(date_range)
|> where([e], e.name == "pageview")
|> where([e], not is_nil(field(e, ^field)))
|> group_by([e], field(e, ^field))
|> select([e], %{
name: field(e, ^field),
visitors: count(e.visitor_hash, :distinct)
})
|> order_by([e], desc: count(e.visitor_hash, :distinct))
|> Repo.all()
end
@doc """
E-commerce funnel: counts for each step.
Returns `%{product_views: n, add_to_carts: n, checkouts: n, purchases: n}`.
"""
def funnel(date_range) do
counts =
base_query(date_range)
|> where([e], e.name in ["product_view", "add_to_cart", "checkout_start", "purchase"])
|> group_by([e], e.name)
|> select([e], {e.name, count(e.visitor_hash, :distinct)})
|> Repo.all()
|> Map.new()
%{
product_views: Map.get(counts, "product_view", 0),
add_to_carts: Map.get(counts, "add_to_cart", 0),
checkouts: Map.get(counts, "checkout_start", 0),
purchases: Map.get(counts, "purchase", 0)
}
end
@doc """
Total revenue in the given date range (pence).
"""
def total_revenue(date_range) do
base_query(date_range)
|> where([e], e.name == "purchase")
|> select([e], coalesce(sum(e.revenue), 0))
|> Repo.one()
end
@doc """
Deletes events older than the given datetime. Used by the retention worker.
"""
def delete_events_before(datetime) do
from(e in Event, where: e.inserted_at < ^datetime)
|> Repo.delete_all()
end
# ── Private ──
defp base_query({start_date, end_date}) do
from(e in Event,
where: e.inserted_at >= ^start_date and e.inserted_at < ^end_date
)
end
end