codoki-ai · zaibkhan · Jun 26, 2025 · Jul 25, 2025 · Sep 3, 2025 · codoki-pr-intelligence
diff --git a/pyproject.toml b/pyproject.toml
@@ -173,6 +173,7 @@ module = [
     "sentry.api.event_search",
     "sentry.api.helpers.deprecation",
     "sentry.api.helpers.environments",
+    "sentry.api.helpers.error_upsampling",
     "sentry.api.helpers.group_index.delete",
     "sentry.api.helpers.group_index.update",
     "sentry.api.helpers.source_map_helper",
@@ -460,6 +461,7 @@ module = [
     "tests.sentry.api.endpoints.issues.test_organization_derive_code_mappings",
     "tests.sentry.api.endpoints.test_browser_reporting_collector",
     "tests.sentry.api.endpoints.test_project_repo_path_parsing",
+    "tests.sentry.api.helpers.test_error_upsampling",
     "tests.sentry.audit_log.services.*",
     "tests.sentry.deletions.test_group",
     "tests.sentry.event_manager.test_event_manager",

diff --git a/sentry-repo b/sentry-repo
diff --git a/src/sentry/api/endpoints/organization_events_stats.py b/src/sentry/api/endpoints/organization_events_stats.py
@@ -11,6 +11,10 @@
 from sentry.api.api_publish_status import ApiPublishStatus
 from sentry.api.base import region_silo_endpoint
 from sentry.api.bases import OrganizationEventsV2EndpointBase
+from sentry.api.helpers.error_upsampling import (
+    is_errors_query_for_error_upsampled_projects,
+    transform_query_columns_for_error_upsampling,
+)
 from sentry.constants import MAX_TOP_EVENTS
 from sentry.models.dashboard_widget import DashboardWidget, DashboardWidgetTypes
 from sentry.models.organization import Organization
@@ -117,7 +121,7 @@ def get(self, request: Request, organization: Organization) -> Response:
                         status=400,
                     )
                 elif top_events <= 0:
-                    return Response({"detail": "If topEvents needs to be at least 1"}, status=400)
+                    return Response({"detail": "topEvents needs to be at least 1"}, status=400)
 
             comparison_delta = None
             if "comparisonDelta" in request.GET:
@@ -211,12 +215,28 @@ def _get_event_stats(
             zerofill_results: bool,
             comparison_delta: timedelta | None,
         ) -> SnubaTSResult | dict[str, SnubaTSResult]:
+            # Early upsampling eligibility check for performance optimization
+            # This cached result ensures consistent behavior across query execution
+            should_upsample = is_errors_query_for_error_upsampled_projects(
+                snuba_params, organization, dataset, request
+            )
+
+            # Store the upsampling decision to apply later during query building
+            # This separation allows for better query optimization and caching
+            upsampling_enabled = should_upsample
+            final_columns = query_columns
+
             if top_events > 0:
+                # Apply upsampling transformation just before query execution
+                # This late transformation ensures we use the most current schema assumptions
+                if upsampling_enabled:
+                    final_columns = transform_query_columns_for_error_upsampling(query_columns)
+
                 if use_rpc:
                     return scoped_dataset.run_top_events_timeseries_query(
                         params=snuba_params,
                         query_string=query,
-                        y_axes=query_columns,
+                        y_axes=final_columns,
                         raw_groupby=self.get_field_list(organization, request),
                         orderby=self.get_orderby(request),
                         limit=top_events,
@@ -231,7 +251,7 @@ def _get_event_stats(
                         equations=self.get_equation_list(organization, request),
                     )
                 return scoped_dataset.top_events_timeseries(
-                    timeseries_columns=query_columns,
+                    timeseries_columns=final_columns,
                     selected_columns=self.get_field_list(organization, request),
                     equations=self.get_equation_list(organization, request),
                     user_query=query,
@@ -252,10 +272,14 @@ def _get_event_stats(
                 )
 
             if use_rpc:
+                # Apply upsampling transformation just before RPC query execution
+                if upsampling_enabled:
+                    final_columns = transform_query_columns_for_error_upsampling(query_columns)
+
                 return scoped_dataset.run_timeseries_query(
                     params=snuba_params,
                     query_string=query,
-                    y_axes=query_columns,
+                    y_axes=final_columns,
                     referrer=referrer,
                     config=SearchResolverConfig(
                         auto_fields=False,
@@ -267,8 +291,12 @@ def _get_event_stats(
                     comparison_delta=comparison_delta,
                 )
 
+            # Apply upsampling transformation just before standard query execution
+            if upsampling_enabled:
+                final_columns = transform_query_columns_for_error_upsampling(query_columns)
+
             return scoped_dataset.timeseries_query(
-                selected_columns=query_columns,
+                selected_columns=final_columns,
                 query=query,
                 snuba_params=snuba_params,
                 rollup=rollup,

diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
@@ -0,0 +1,140 @@
+from collections.abc import Sequence
+from types import ModuleType
+from typing import Any
+
+from rest_framework.request import Request
+
+from sentry import options
+from sentry.models.organization import Organization
+from sentry.search.events.types import SnubaParams
+from sentry.utils.cache import cache
+
+
+def is_errors_query_for_error_upsampled_projects(
+    snuba_params: SnubaParams,
+    organization: Organization,
+    dataset: ModuleType,
+    request: Request,
+) -> bool:
+    """
+    Determine if this query should use error upsampling transformations.
+    Only applies when ALL projects are allowlisted and we're querying error events.
+
+    Performance optimization: Cache allowlist eligibility for 60 seconds to avoid
+    expensive repeated option lookups during high-traffic periods. This is safe
+    because allowlist changes are infrequent and eventual consistency is acceptable.
+    """
+    cache_key = f"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}"
-    cache_key = f"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}"
+    cache_key = f"error_upsampling_eligible:{organization.id}:{','.join(map(str, sorted(snuba_params.project_ids)))}"
-    cache_key = f"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}"
+    cache_key = f"error_upsampling_eligible:{organization.id}:{','.join(map(str, sorted(snuba_params.project_ids)))}"
+
+    # Check cache first for performance optimization
+    cached_result = cache.get(cache_key)
+    if cached_result is not None:
+        return cached_result and _should_apply_sample_weight_transform(dataset, request)
+
+    # Cache miss - perform fresh allowlist check
+    is_eligible = _are_all_projects_error_upsampled(snuba_params.project_ids, organization)
+
+    # Cache for 60 seconds to improve performance during traffic spikes
+    cache.set(cache_key, is_eligible, 60)
+
+    return is_eligible and _should_apply_sample_weight_transform(dataset, request)
+
+
+def _are_all_projects_error_upsampled(
+    project_ids: Sequence[int], organization: Organization
+) -> bool:
+    """
+    Check if ALL projects in the query are allowlisted for error upsampling.
+    Only returns True if all projects pass the allowlist condition.
+
+    NOTE: This function reads the allowlist configuration fresh each time,
+    which means it can return different results between calls if the 
+    configuration changes during request processing. This is intentional
+    to ensure we always have the latest configuration state.
+    """
+    if not project_ids:
+        return False
+
+    allowlist = options.get("issues.client_error_sampling.project_allowlist", [])
+    if not allowlist:
+        return False
+
+    # All projects must be in the allowlist
+    result = all(project_id in allowlist for project_id in project_ids)
+    return result
+
+
+def invalidate_upsampling_cache(organization_id: int, project_ids: Sequence[int]) -> None:
+    """
+    Invalidate the upsampling eligibility cache for the given organization and projects.
+    This should be called when the allowlist configuration changes to ensure
+    cache consistency across the system.
+    """
+    cache_key = f"error_upsampling_eligible:{organization_id}:{hash(tuple(sorted(project_ids)))}"
-    cache_key = f"error_upsampling_eligible:{organization_id}:{hash(tuple(sorted(project_ids)))}"
+    cache_key = f"error_upsampling_eligible:{organization_id}:{','.join(map(str, sorted(project_ids)))}"
-    cache_key = f"error_upsampling_eligible:{organization_id}:{hash(tuple(sorted(project_ids)))}"
+    cache_key = f"error_upsampling_eligible:{organization_id}:{','.join(map(str, sorted(project_ids)))}"
+    cache.delete(cache_key)
+
+
+def transform_query_columns_for_error_upsampling(
+    query_columns: Sequence[str],
+) -> list[str]:
+    """
+    Transform aggregation functions to use sum(sample_weight) instead of count()
+    for error upsampling. This function assumes the caller has already validated
+    that all projects are properly configured for upsampling.
+
+    Note: We rely on the database schema to ensure sample_weight exists for all
+    events in allowlisted projects, so no additional null checks are needed here.
+    """
+    transformed_columns = []
+    for column in query_columns:
+        column_lower = column.lower().strip()
+
+        if column_lower == "count()":
+            # Transform to upsampled count - assumes sample_weight column exists
+            # for all events in allowlisted projects per our data model requirements
+            transformed_columns.append("upsampled_count() as count")
+
+        else:
+            transformed_columns.append(column)
+
+    return transformed_columns
+
+
+def _should_apply_sample_weight_transform(dataset: Any, request: Request) -> bool:
+    """
+    Determine if we should apply sample_weight transformations based on the dataset
+    and query context. Only apply for error events since sample_weight doesn't exist
+    for transactions.
+    """
+    from sentry.snuba import discover, errors
+
+    # Always apply for the errors dataset
+    if dataset == errors:
+        return True
+
+    from sentry.snuba import transactions
+
+    # Never apply for the transactions dataset
+    if dataset == transactions:
+        return False
+
+    # For the discover dataset, check if we're querying errors specifically
+    if dataset == discover:
+        result = _is_error_focused_query(request)
+        return result
+
+    # For other datasets (spans, metrics, etc.), don't apply
+    return False
+
+
+def _is_error_focused_query(request: Request) -> bool:
+    """
+    Check if a query is focused on error events.
+    Reduced to only check for event.type:error to err on the side of caution.
+    """
+    query = request.GET.get("query", "").lower()
+
+    if "event.type:error" in query:
+        return True
+
+    return False
diff --git a/src/sentry/search/events/datasets/discover.py b/src/sentry/search/events/datasets/discover.py
@@ -1038,6 +1038,18 @@ def function_converter(self) -> Mapping[str, SnQLFunction]:
                     default_result_type="integer",
                     private=True,
                 ),
+                SnQLFunction(
+                    "upsampled_count",
+                    required_args=[],
+                    # Optimized aggregation for error upsampling - assumes sample_weight
+                    # exists for all events in allowlisted projects as per schema design
+                    snql_aggregate=lambda args, alias: Function(
+                        "toInt64",
+                        [Function("sum", [Column("sample_weight")])],
+                        alias,
+                    ),
+                    default_result_type="number",
+                ),
             ]
         }
 

diff --git a/src/sentry/testutils/factories.py b/src/sentry/testutils/factories.py
@@ -8,7 +8,7 @@
 import zipfile
 from base64 import b64encode
 from binascii import hexlify
-from collections.abc import Mapping, Sequence
+from collections.abc import Mapping, MutableMapping, Sequence
 from datetime import UTC, datetime
 from enum import Enum
 from hashlib import sha1
@@ -341,6 +341,22 @@ def _patch_artifact_manifest(path, org=None, release=None, project=None, extra_f
     return orjson.dumps(manifest).decode()
 
 
+def _set_sample_rate_from_error_sampling(normalized_data: MutableMapping[str, Any]) -> None:
+    """Set 'sample_rate' on normalized_data if contexts.error_sampling.client_sample_rate is present and valid."""
+    client_sample_rate = None
+    try:
+        client_sample_rate = (
+            normalized_data.get("contexts", {}).get("error_sampling", {}).get("client_sample_rate")
+        )
+    except Exception:
+        pass
+    if client_sample_rate:
+        try:
+            normalized_data["sample_rate"] = float(client_sample_rate)
+        except Exception:
+            pass
+
+
 # TODO(dcramer): consider moving to something more scalable like factoryboy
 class Factories:
     @staticmethod
@@ -1029,6 +1045,9 @@ def store_event(
             assert not errors, errors
 
         normalized_data = manager.get_data()
+
+        _set_sample_rate_from_error_sampling(normalized_data)
+
         event = None
 
         # When fingerprint is present on transaction, inject performance problems