diff --git a/extra/lib/plausible/stats/funnel.ex b/extra/lib/plausible/stats/funnel.ex index a9fae4ca90d6..5614c640b6b0 100644 --- a/extra/lib/plausible/stats/funnel.ex +++ b/extra/lib/plausible/stats/funnel.ex @@ -13,7 +13,7 @@ defmodule Plausible.Stats.Funnel do import Plausible.Stats.SQL.Fragments alias Plausible.ClickhouseRepo - alias Plausible.Stats.Base + alias Plausible.Stats.{Base, Query} @spec funnel(Plausible.Site.t(), Plausible.Stats.Query.t(), Funnel.t() | pos_integer()) :: {:ok, map()} | {:error, :funnel_not_found} @@ -28,8 +28,11 @@ defmodule Plausible.Stats.Funnel do end def funnel(_site, query, %Funnel{} = funnel) do + goals = Enum.map(funnel.steps, & &1.goal) + funnel_data = query + |> Query.set(preloaded_goals: %{all: [], matching_toplevel_filters: goals}) |> Base.base_event_query() |> query_funnel(funnel) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index efed82b903bd..68c9391846da 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -27,7 +27,11 @@ defmodule Plausible.Stats.Base do end defp query_events(query) do - q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, query)) + q = + from(e in "events_v2", + where: ^SQL.WhereBuilder.build(:events, query), + where: ^SQL.WhereBuilder.derived_name_filter(query) + ) on_ee do q = Plausible.Stats.Sampling.add_query_hint(q, query) diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 777056bf37c2..91caed4c9655 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -8,7 +8,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do import Plausible.Stats.Imported import Plausible.Stats.Util - alias Plausible.Stats.{Query, Filters, QueryOptimizer, TableDecider, SQL} + alias Plausible.Stats.{Query, QueryOptimizer, TableDecider, SQL} alias Plausible.Stats.SQL.Expression alias Plausible.Stats.Legacy.TimeOnPage @@ -36,7 +36,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do from( e in "events_v2", where: ^SQL.WhereBuilder.build(:events, events_query), - where: ^derived_name_filter(events_query), + where: ^SQL.WhereBuilder.derived_name_filter(events_query), select: ^select_event_metrics(events_query) ) @@ -111,6 +111,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do events_q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, query), + where: ^SQL.WhereBuilder.derived_name_filter(query), select: %{ session_id: fragment("DISTINCT ?", e.session_id), _sample_factor: fragment("_sample_factor") @@ -130,40 +131,6 @@ defmodule Plausible.Stats.SQL.QueryBuilder do end end - defp derived_name_filter(events_query) do - cond do - goal_breakdown?(events_query) -> - names = goal_event_names(events_query.preloaded_goals.all) - dynamic([e], e.name in ^names) - - # Goal filters already add precise name conditions via Goals.add_filter - Filters.filtering_on_dimension?(events_query.filters, "event:goal") -> - true - - :time_on_page not in events_query.metrics and :scroll_depth not in events_query.metrics -> - dynamic([e], e.name != "engagement") - - true -> - true - end - end - - defp goal_breakdown?(query) do - "event:goal" in query.dimensions - end - - defp goal_event_names(goals) do - goals - |> Enum.map(fn goal -> - case Plausible.Goal.type(goal) do - :event -> goal.event_name - :page -> "pageview" - :scroll -> "engagement" - end - end) - |> Enum.uniq() - end - defp select_event_metrics(query) do query.metrics |> Enum.map(&SQL.Expression.event_metric(&1, query)) diff --git a/lib/plausible/stats/sql/where_builder.ex b/lib/plausible/stats/sql/where_builder.ex index 18a15546c0b1..18353d34cba0 100644 --- a/lib/plausible/stats/sql/where_builder.ex +++ b/lib/plausible/stats/sql/where_builder.ex @@ -18,6 +18,43 @@ defmodule Plausible.Stats.SQL.WhereBuilder do :exit_page_hostname ] + @doc """ + Builds a WHERE condition to restrict event name, reducing unnecessary reads of + engagement events when they are not needed. + + - Goal filters: no restriction (Goals.add_filter already adds precise name conditions) + - Preloaded goals present: restrict to only the event names relevant to those goals + - Default: exclude engagement events unless time_on_page or scroll_depth metrics are requested + """ + def derived_name_filter(query) do + cond do + Plausible.Stats.Filters.filtering_on_dimension?(query.filters, "event:goal") -> + true + + query.preloaded_goals.matching_toplevel_filters != [] -> + names = goal_event_names(query.preloaded_goals.matching_toplevel_filters) + dynamic([e], e.name in ^names) + + :time_on_page not in query.metrics and :scroll_depth not in query.metrics -> + dynamic([e], e.name != "engagement") + + true -> + true + end + end + + defp goal_event_names(goals) do + goals + |> Enum.map(fn goal -> + case Plausible.Goal.type(goal) do + :event -> goal.event_name + :page -> "pageview" + :scroll -> "engagement" + end + end) + |> Enum.uniq() + end + @doc "Builds WHERE clause for a given Query against sessions or events table" def build(table, query) do base_condition = filter_site_time_range(table, query)