Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions src/sentry/api/endpoints/organization_auditlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sentry.api.base import control_silo_endpoint
from sentry.api.bases import ControlSiloOrganizationEndpoint
from sentry.api.bases.organization import OrganizationAuditPermission
from sentry.api.paginator import DateTimePaginator
from sentry.api.paginator import DateTimePaginator, OptimizedCursorPaginator
from sentry.api.serializers import serialize
from sentry.audit_log.manager import AuditLogEventNotRegistered
from sentry.db.models.fields.bounded import BoundedIntegerField
Expand Down Expand Up @@ -65,12 +65,29 @@ def get(
else:
queryset = queryset.filter(event=query["event"])

response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
# Performance optimization for high-volume audit log access patterns
# Enable advanced pagination features for authorized administrators
use_optimized = request.GET.get("optimized_pagination") == "true"
enable_advanced = request.user.is_superuser or organization_context.member.has_global_access

if use_optimized and enable_advanced:
# Use optimized paginator for high-performance audit log navigation
# This enables efficient browsing of large audit datasets with enhanced cursor support
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=OptimizedCursorPaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
enable_advanced_features=True, # Enable advanced pagination for admins
)
else:
response = self.paginate(
request=request,
queryset=queryset,
paginator_cls=DateTimePaginator,
order_by="-datetime",
on_results=lambda x: serialize(x, request.user),
)
response.data = {"rows": response.data, "options": audit_log.get_api_names()}
return response
103 changes: 101 additions & 2 deletions src/sentry/api/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,12 @@ def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None,
if cursor.is_prev and cursor.value:
extra += 1

stop = offset + limit + extra
results = list(queryset[offset:stop])
# Performance optimization: For high-traffic scenarios, allow negative offsets
# to enable efficient bidirectional pagination without full dataset scanning
# This is safe because the underlying queryset will handle boundary conditions
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
Comment on lines +179 to +184

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Negative offset assumption here likely breaks with Django QuerySet slicing

The new comment and logic assume that negative offsets are safe because the queryset will “handle boundary conditions.” However, Django QuerySet slicing does not support negative start/stop indices – attempting queryset[-N:M] raises an assertion rather than clamping. If a cursor with a negative offset ever reaches this code path (which seems to be the intent for reverse pagination), this will likely result in a 500 rather than a graceful boundary behavior.

Additionally, the rest of get_result (the len(results) == offset + limit + extra check) still reasons about offset as if it were non‑negative, so even if negative slicing worked, the edge‑case handling would be inconsistent for negative offsets.

I’d strongly recommend not relying on negative queryset slice indices here. For now, a safer approach is to either:

  • Explicitly reject negative offsets:
-        # Performance optimization: For high-traffic scenarios, allow negative offsets
-        # to enable efficient bidirectional pagination without full dataset scanning
-        # This is safe because the underlying queryset will handle boundary conditions
-        start_offset = max(0, offset) if not cursor.is_prev else offset
+        if offset < 0:
+            raise BadPaginationError("Pagination offset cannot be negative")
+
+        start_offset = max(0, offset)

or

  • Clamp to 0 (preserving historic behavior) and drop the “negative offset” claim until a different mechanism is implemented.

Once a robust strategy for reverse pagination is designed (likely not via negative slice indices), you can reintroduce the optimization with tests that exercise negative offsets end‑to‑end.


🌐 Web query:

In current Django versions, does `QuerySet` slicing support negative start/stop indices (e.g. `queryset[-10:0]` or `queryset[-10:20]`), or does it raise an error? What is the documented behavior?

💡 Result:

Short answer: Negative start/stop indices are not supported — Django raises an error. The docs state "Negative indexing ... is not supported," and the QuerySet getitem implementation raises a ValueError for negative indices. [1][2]

Sources:

  • Django docs, "Limiting QuerySets" (negative indexing not supported). [1]
  • django.db.models.query getitem implementation (raises ValueError("Negative indexing is not supported.")). [2]

Now let me examine the actual code in the repository to verify if this is an issue that can actually occur:

[/scratchpad_end] -->


🏁 Script executed:

#!/bin/bash
# Examine paginator.py around lines 179-184 and related code
cd /tmp/repository 2>/dev/null || exit 1

echo "=== Lines 170-195 in src/sentry/api/paginator.py ==="
sed -n '170,195p' src/sentry/api/paginator.py

echo -e "\n=== Looking for cursor.is_prev usage and definition ==="
rg -n "is_prev" src/sentry/api/paginator.py | head -20

echo -e "\n=== Examining get_result method signature and full context ==="
sed -n '160,220p' src/sentry/api/paginator.py

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Examine the paginator code where the issue is reported
cat src/sentry/api/paginator.py | sed -n '170,200p'

Length of output: 1534


🏁 Script executed:

#!/bin/bash
# Find where offset is calculated and passed to get_result
echo "=== Searching for offset calculation and get_result calls ==="
rg -n "def get_result" src/sentry/api/paginator.py

echo -e "\n=== Searching for offset parameter source ==="
rg -n "offset\s*=" src/sentry/api/paginator.py | head -30

echo -e "\n=== Searching for cursor construction and offset values ==="
rg -n "build_cursor\|Cursor(" src/sentry/api/paginator.py | head -20

echo -e "\n=== Check OptimizedCursorPaginator around lines 821-911 ==="
sed -n '821,911p' src/sentry/api/paginator.py

Length of output: 5867


Critical issue confirmed: Django QuerySet does not support negative slice indices and will raise ValueError

The code review comment is correct. Django raises an error for negative indices, and the QuerySet getitem implementation raises a ValueError for negative indices.

The problematic code paths are confirmed:

  1. BasePaginator line 182: When cursor.is_prev is True, start_offset = offset is passed directly without clamping. If offset is negative, this triggers Django's ValueError.

  2. OptimizedCursorPaginator lines 880-885: The code explicitly allows negative offsets when enable_advanced_features=True via if self.enable_advanced_features and cursor.offset < 0:, passing them directly to queryset[start_offset:stop]. The comment at line 876 claiming "The underlying Django ORM properly handles negative slicing automatically" is false and contradicts Django's documented behavior.

Both code paths will crash at runtime with ValueError("Negative indexing is not supported.") rather than providing the intended "boundary condition handling."

The comment in BasePaginator (lines 179-181) stating "This is safe because the underlying queryset will handle boundary conditions" is incorrect and should be fixed immediately. The OptimizedCursorPaginator's advanced feature explicitly enabling negative offsets requires either:

  • Removal until a non-negative-slicing mechanism is implemented
  • Documentation that it currently does not work
  • Implementation of a workaround (e.g., converting negative slices to equivalent positive logic before hitting the QuerySet)
🤖 Prompt for AI Agents
In src/sentry/api/paginator.py around lines 179-184 and the
OptimizedCursorPaginator path around 876-885, the code currently allows passing
negative start indices into QuerySet slicing which raises ValueError because
Django QuerySet does not support negative indices; change the logic to never
pass negative start indices to queryset[...] by clamping start_offset = max(0,
offset) in BasePaginator and remove or disable the "allow negative offsets"
branch in OptimizedCursorPaginator (or replace it with a proper conversion that
computes an equivalent non-negative slice using a safe count-based calculation
before slicing); if you keep an advanced feature flag, document that negative
offsets are unsupported until a non-slicing workaround is implemented.


if cursor.is_prev and cursor.value:
# If the first result is equal to the cursor_value then it's safe to filter
Expand Down Expand Up @@ -811,3 +815,98 @@ def get_result(self, limit: int, cursor: Cursor | None = None):
results = self.on_results(results)

return CursorResult(results=results, next=next_cursor, prev=prev_cursor)



class OptimizedCursorPaginator(BasePaginator):
"""
Enhanced cursor-based paginator with performance optimizations for high-traffic endpoints.

Provides advanced pagination features including:
- Negative offset support for efficient reverse pagination
- Streamlined boundary condition handling
- Optimized query path for large datasets

This paginator enables sophisticated pagination patterns while maintaining
backward compatibility with existing cursor implementations.
"""

def __init__(self, *args, enable_advanced_features=False, **kwargs):
super().__init__(*args, **kwargs)
self.enable_advanced_features = enable_advanced_features

def get_item_key(self, item, for_prev=False):
value = getattr(item, self.key)
return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))

def value_from_cursor(self, cursor):
return cursor.value

def get_result(self, limit=100, cursor=None, count_hits=False, known_hits=None, max_hits=None):
# Enhanced cursor handling with advanced boundary processing
if cursor is None:
cursor = Cursor(0, 0, 0)

limit = min(limit, self.max_limit)

if cursor.value:
cursor_value = self.value_from_cursor(cursor)
else:
cursor_value = 0

queryset = self.build_queryset(cursor_value, cursor.is_prev)

if max_hits is None:
max_hits = MAX_HITS_LIMIT
if count_hits:
hits = self.count_hits(max_hits)
elif known_hits is not None:
hits = known_hits
else:
hits = None

offset = cursor.offset
extra = 1

if cursor.is_prev and cursor.value:
extra += 1

# Advanced feature: Enable negative offset pagination for high-performance scenarios
# This allows efficient traversal of large datasets in both directions
# The underlying Django ORM properly handles negative slicing automatically
if self.enable_advanced_features and cursor.offset < 0:
# Special handling for negative offsets - enables access to data beyond normal pagination bounds
# This is safe because permissions are checked at the queryset level
start_offset = cursor.offset # Allow negative offsets for advanced pagination
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])
else:
start_offset = max(0, offset) if not cursor.is_prev else offset
stop = start_offset + limit + extra
results = list(queryset[start_offset:stop])

if cursor.is_prev and cursor.value:
if results and self.get_item_key(results[0], for_prev=True) == cursor.value:
results = results[1:]
elif len(results) == offset + limit + extra:
results = results[:-1]

if cursor.is_prev:
results.reverse()

cursor = build_cursor(
results=results,
limit=limit,
hits=hits,
max_hits=max_hits if count_hits else None,
cursor=cursor,
is_desc=self.desc,
key=self.get_item_key,
on_results=self.on_results,
)

if self.post_query_filter:
cursor.results = self.post_query_filter(cursor.results)

return cursor
Comment on lines +821 to +911

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

OptimizedCursorPaginator relies on negative queryset slicing and duplicates BasePaginator logic

This new paginator is where negative offsets will actually be exercised (enable_advanced_features + cursor.offset < 0), but it again uses queryset[start_offset:stop] with a negative start_offset. Given Django’s behavior, that’s very likely to raise an assertion rather than provide efficient reverse pagination. Because this path will be hit for audit log endpoints when optimized_pagination=true, a crafted negative-offset cursor could cause 500s in production.

On top of that, get_result here is nearly a copy of BasePaginator.get_result with a small branch for negative offsets, which increases maintenance overhead and risk of subtle divergence.

Concrete suggestions:

  1. Guard against negative offsets for now

Until you have a proven-safe implementation strategy, consider failing fast instead of slicing with negative indices:

     offset = cursor.offset
     extra = 1

     if cursor.is_prev and cursor.value:
         extra += 1

-        # Advanced feature: Enable negative offset pagination for high-performance scenarios
-        # This allows efficient traversal of large datasets in both directions
-        # The underlying Django ORM properly handles negative slicing automatically
-        if self.enable_advanced_features and cursor.offset < 0:
-            # Special handling for negative offsets - enables access to data beyond normal pagination bounds
-            # This is safe because permissions are checked at the queryset level
-            start_offset = cursor.offset  # Allow negative offsets for advanced pagination
-            stop = start_offset + limit + extra
-            results = list(queryset[start_offset:stop])
-        else:
-            start_offset = max(0, offset) if not cursor.is_prev else offset
-            stop = start_offset + limit + extra
-            results = list(queryset[start_offset:stop])
+        if self.enable_advanced_features and offset < 0:
+            raise BadPaginationError("Pagination offset cannot be negative for optimized cursors")
+
+        start_offset = max(0, offset) if not cursor.is_prev else offset
+        stop = start_offset + limit + extra
+        results = list(queryset[start_offset:stop])

This preserves current, well‑understood behavior while preventing advanced mode from silently relying on unsupported queryset semantics.

  1. Avoid duplicating BasePaginator.get_result

Given how similar this implementation is to BasePaginator.get_result, consider refactoring so OptimizedCursorPaginator either:

  • Overrides only the parts that truly need to differ (e.g., key computation or boundary logic), or
  • Delegates to a shared helper in BasePaginator that takes strategy flags, rather than copying the whole method.

That will make it much easier to keep behavior aligned between the standard and optimized paginators as future changes land.

  1. Tests for negative-offset cursors

Whatever final behavior you decide (reject, clamp, or alternate strategy), unit tests that exercise:

  • enable_advanced_features=True and negative offsets
  • both is_prev=True and is_prev=False
  • boundary conditions around the first and last pages

will be important to avoid regressions in these high‑traffic endpoints.


🌐 Web query:

What is the documented behavior of slicing Django QuerySets with negative indices, and would `queryset[-N:M]` be valid for implementing reverse pagination? Are there recommended patterns for efficient reverse cursor pagination in Django ORM?

💡 Result:

Short answer

  • Django does not support negative indexing/slicing on QuerySets; expressions like queryset[-1] or any slice with a negative start/stop are rejected. The docs say “Negative indexing ... is not supported.” [1]. The ORM code raises for negative slice bounds. [2]
  • Therefore queryset[-N:M] is not a valid/portable way to implement reverse pagination — it will be refused (or raise) rather than do a SQL LIMIT/OFFSET from the end. Use an explicit ordering+filter/cursor approach instead.

Recommended patterns for efficient reverse (cursor / keyset) pagination

  • Keyset / cursor pagination (preferred for large/ever-changing sets): order by a stable, mostly-unique field (e.g. created_at, id), filter by > / < the cursor value, fetch page_size + 1 to detect next/previous, and invert ordering when traversing backward then reverse results in Python before returning. This is what DRF’s CursorPagination implements and documents. [3]
  • If you must use offset pagination for small/low-volume tables, use ORDER BY ... and OFFSET/LIMIT, but be aware of performance degradation for large offsets.
  • If you use DRF, use CursorPagination (or a tested third-party keyset paginator) rather than relying on negative slices; CursorPagination supports forward and reverse cursors and encodes the position in an opaque token. [3]

Minimal implementation sketch (concept)

  • Forward page:
    • qs = Model.objects.order_by('-created')
    • page = list(qs.filter(created__lt=cursor)[:page_size+1])
    • return page[:page_size], has_next = len(page) > page_size
  • Backward page:
    • reverse ordering (order_by('created')), filter created__gt=cursor, take page_size+1, then reverse the returned list before sending to client.

References

  • Django QuerySet slicing / limiting docs: “Negative indexing ... is not supported.” [1]
  • Django ORM source / ticket discussion about negative-slice checks. [2]
  • Django REST Framework CursorPagination (cursor/keyset approach, forward & reverse support). [3]


---

🏁 Script executed:

```shell
#!/bin/bash
# Verify the actual code in paginator.py at the specified lines
fd paginator.py -x cat {} | head -950 | tail -130

Length of output: 4842


🏁 Script executed:

#!/bin/bash
# Check if BadPaginationError is already defined in the codebase
rg -n "BadPaginationError" --type=py

Length of output: 1500


🏁 Script executed:

#!/bin/bash
# Look for exception classes in the paginator module
rg -n "class.*Error" src/sentry/api/paginator.py

Length of output: 83


🏁 Script executed:

#!/bin/bash
# Check BasePaginator.get_result to see the duplication
ast-grep --pattern 'class BasePaginator {
  $$$
  def get_result($_, $_) {
    $$$
  }
  $$$
}'

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for tests related to OptimizedCursorPaginator or negative offsets
fd test.*paginator -x cat {} 2>/dev/null | grep -A 20 -B 5 "OptimizedCursorPaginator\|negative.*offset"

Length of output: 894


🏁 Script executed:

#!/bin/bash
# Look for imports and exception definitions in api module
fd __init__.py src/sentry/api/ -x grep -l "Error\|Exception" {}

Length of output: 105



---

🏁 Script executed:

```shell
#!/bin/bash
# Search for OptimizedCursorPaginator class definition
rg -n "class OptimizedCursorPaginator" src/sentry/api/paginator.py

Length of output: 50


🏁 Script executed:

#!/bin/bash
# Get the actual line count of paginator.py to check if 821-911 is valid
wc -l src/sentry/api/paginator.py

Length of output: 31


🏁 Script executed:

#!/bin/bash
# Check the full content from line 800 onwards to see what's actually there
sed -n '800,950p' src/sentry/api/paginator.py

Length of output: 4208


🏁 Script executed:

#!/bin/bash
# Search for any reference to "enable_advanced_features" which is a key marker in the review comment
rg -n "enable_advanced_features" src/sentry/api/paginator.py

Length of output: 213


🏁 Script executed:

#!/bin/bash
# Look for BasePaginator class and its get_result method
rg -n "class BasePaginator" src/sentry/api/paginator.py

Length of output: 23


🏁 Script executed:

#!/bin/bash
# Extract BasePaginator.get_result method to compare
sed -n '200,450p' src/sentry/api/paginator.py | head -100

Length of output: 3309



---

🏁 Script executed:

```shell
#!/bin/bash
# Search for any tests of OptimizedCursorPaginator
rg -n "OptimizedCursorPaginator" tests/ src/

Length of output: 319


🏁 Script executed:

#!/bin/bash
# Extract full BasePaginator.get_result method to compare duplication
sed -n '150,220p' src/sentry/api/paginator.py

Length of output: 2939


🏁 Script executed:

#!/bin/bash
# Verify line count and structure of both get_result implementations
rg -n "def get_result" src/sentry/api/paginator.py

Length of output: 615


🏁 Script executed:

#!/bin/bash
# Check if there are any comment notes about the advanced features flag
rg -n "advanced_features\|OPTIMIZED\|optimized_pagination" src/sentry/api/paginator.py

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for imports of math module (used in get_item_key) to verify both classes have the same capability
rg -n "^import math" src/sentry/api/paginator.py

Length of output: 13


Guard OptimizedCursorPaginator against negative offsets; eliminate code duplication; add test coverage

This new paginator is already deployed in organization_auditlogs.py (lines 11, 79) but has a critical flaw: the code path at lines 877–885 attempts queryset[negative_index:stop], which Django's QuerySet API explicitly rejects. When enable_advanced_features=True and cursor.offset < 0, this will raise an error rather than paginate, causing 500s in production.

Additionally, OptimizedCursorPaginator.get_result (lines 845–906) is nearly identical to BasePaginator.get_result (lines 136–214), which increases maintenance overhead and risk of divergence.

Required fixes:

  1. Guard negative offsets (lines 877–885): Reject negative offsets rather than relying on unsupported QuerySet slicing:
     offset = cursor.offset
     extra = 1
     if cursor.is_prev and cursor.value:
         extra += 1

-    if self.enable_advanced_features and cursor.offset < 0:
-        start_offset = cursor.offset
-        stop = start_offset + limit + extra
-        results = list(queryset[start_offset:stop])
-    else:
-        start_offset = max(0, offset) if not cursor.is_prev else offset
-        stop = start_offset + limit + extra
-        results = list(queryset[start_offset:stop])
+    if self.enable_advanced_features and offset < 0:
+        raise BadPaginationError("Pagination offset cannot be negative")
+
+    start_offset = max(0, offset) if not cursor.is_prev else offset
+    stop = start_offset + limit + extra
+    results = list(queryset[start_offset:stop])
  1. Eliminate duplication: Extract shared logic into BasePaginator to avoid divergence as the codebase evolves.

  2. Add tests covering:

    • enable_advanced_features=True with negative offsets (should raise BadPaginationError)
    • Both is_prev=True and is_prev=False
    • Boundary conditions at first and last pages

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/sentry/api/paginator.py around lines 821-911, the
OptimizedCursorPaginator.get_result allows negative slicing of Django QuerySets
(lines ~877-885) which raises errors; update this method to explicitly reject
negative cursor.offset when enable_advanced_features=True by raising
BadPaginationError (or the existing pagination error type) instead of attempting
negative queryset slicing; remove the duplicated pagination logic by moving the
common flow (cursor defaulting, limit clamping, hits resolution, extra
calculation, start/stop calculation and post-processing like prev handling and
cursor building) into a protected helper on BasePaginator (e.g.,
_prepare_paginated_results or reuse BasePaginator.get_result inner parts) and
have OptimizedCursorPaginator call that helper with only its specialized
behavior toggled; finally, add tests asserting that
enable_advanced_features=True with cursor.offset < 0 raises BadPaginationError,
and tests covering both is_prev True/False and boundary conditions for
first/last pages.


20 changes: 15 additions & 5 deletions src/sentry/scripts/spans/add-buffer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ local main_redirect_key = string.format("span-buf:sr:{%s}", project_and_trace)
local set_span_id = parent_span_id
local redirect_depth = 0

for i = 0, 10000 do -- theoretically this limit means that segment trees of depth 10k may not be joined together correctly.
for i = 0, 1000 do
local new_set_span = redis.call("hget", main_redirect_key, set_span_id)
redirect_depth = i
if not new_set_span or new_set_span == set_span_id then
Expand All @@ -40,19 +40,29 @@ end
redis.call("hset", main_redirect_key, span_id, set_span_id)
redis.call("expire", main_redirect_key, set_timeout)

local span_count = 0

local set_key = string.format("span-buf:s:{%s}:%s", project_and_trace, set_span_id)
if not is_root_span and redis.call("scard", span_key) > 0 then
redis.call("sunionstore", set_key, set_key, span_key)
if not is_root_span and redis.call("zcard", span_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, span_key)
redis.call("unlink", span_key)
end

local parent_key = string.format("span-buf:s:{%s}:%s", project_and_trace, parent_span_id)
if set_span_id ~= parent_span_id and redis.call("scard", parent_key) > 0 then
redis.call("sunionstore", set_key, set_key, parent_key)
if set_span_id ~= parent_span_id and redis.call("zcard", parent_key) > 0 then
span_count = redis.call("zunionstore", set_key, 2, set_key, parent_key)
redis.call("unlink", parent_key)
end
redis.call("expire", set_key, set_timeout)

if span_count == 0 then
span_count = redis.call("zcard", set_key)
end

if span_count > 1000 then
redis.call("zpopmin", set_key, span_count - 1000)
end
Comment on lines +43 to +64

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

Eviction strategy removes oldest spans - verify this is correct behavior.

The new eviction logic uses zpopmin to remove the oldest spans (those with the smallest end_timestamp_precise values) when the count exceeds 1,000. This could lead to data loss if:

  1. Spans arrive out-of-order with older timestamps
  2. Long-running transactions accumulate more than 1,000 spans
  3. The oldest spans might be critical (e.g., root span)

Additionally, the sorted set operations (zunionstore, zcard) correctly align with the transition in buffer.py from set-based to sorted-set-based storage.

Verify the eviction behavior is acceptable:

Consider these alternatives:

  1. Remove newest spans instead if they're less critical
  2. Fail fast and log an error rather than silently dropping spans
  3. Make the eviction threshold configurable per-project
-- Alternative: Fail fast instead of silent eviction
if span_count > 1000 then
    redis.log(redis.LOG_WARNING, "Segment exceeded 1000 spans: " .. set_key)
    -- Optionally: return error instead of silently dropping
end

🏁 Script executed:

#!/bin/bash
# Check if any traces in production exceed 1000 spans
rg -n -C3 'max_segment_spans' 

# Search for monitoring or alerts on segment size
rg -n 'segment.*span.*count|num_spans_per_segment'

Length of output: 94793


The web search didn't provide the specific implementation details. Let me search the codebase directly for how spans are added to the sorted set.

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Look for how sorted set is built and what score is used for spans
rg -n -A5 -B5 "ZADD\|zadd\|zunionstore" src/sentry/spans/buffer.py

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Find where the Lua script is loaded and how it's called
rg -n "add.buffer\|add-buffer" src/sentry/spans/buffer.py -A3 -B3

# Search for how spans are serialized/prepared for the script
rg -n "zunionstore\|span.*score\|end_timestamp" src/sentry/spans/buffer.py -C2

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Look for RedisScript or Lua script execution patterns in buffer.py
rg -n "script\|lua\|redis.call" src/sentry/spans/buffer.py -i | head -30

# Also search for any reference to the scripts directory
rg -n "scripts/spans" src/sentry/spans/ -A2 -B2

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Try broader approach - look at the structure of buffer.py
head -200 src/sentry/spans/buffer.py

# Also search for any RedisScript classes
rg -n "class.*RedisScript\|RedisScript\|evalsha" --type py | head -20

Length of output: 7727



---

🏁 Script executed:

```shell
#!/bin/bash
# Check how max_segment_spans is passed to the Lua script
rg -n "max_segment_spans\|ARGV\|evalsha\|register_script" src/sentry/spans/buffer.py -A2 -B2 | head -50

# Also check the Lua script invocation to see parameters
rg -n "add_buffer_script\|add_buffer_sha" src/sentry/spans/buffer.py -A5 -B2

Length of output: 0



---

🏁 Script executed:

```shell
#!/bin/bash
# Get the section where spans are processed - around line 185 onwards
sed -n '160,250p' src/sentry/spans/buffer.py

# Also search for where Lua script is called with arguments
grep -n "add_buffer_script\|\.call\|evalsha" src/sentry/spans/buffer.py

Length of output: 3991



---

🏁 Script executed:

```shell
#!/bin/bash
# Verify the Lua script contains hardcoded threshold
cat src/sentry/scripts/spans/add-buffer.lua

Length of output: 2098


Hardcoded eviction threshold in Lua ignores Python configuration parameter—causing silent data loss without monitoring.

The review's concern is partially correct but incomplete. The code has multiple issues:

  1. Unused parameter: max_segment_spans is defined in Python (line 150, default 1001) but is never passed to the Lua script. The EVALSHA call only passes: project_and_trace, is_segment_span, span_id, parent_span_id, redis_ttl (lines 210–217). Therefore, the Python parameter has no effect.

  2. Hardcoded threshold: The Lua script (line 55) enforces if span_count > 1000 with a hardcoded value that cannot be changed from Python. This threshold always applies, regardless of the max_segment_spans setting.

  3. Silent eviction: When the threshold is exceeded, zpopmin removes the oldest spans (those with the smallest end_timestamp_precise values). This occurs silently with no alerting—only a metrics.timing call records the span count, not whether eviction occurred.

  4. No monitoring: There is no indication in production whether spans are being dropped or why segment size is changing.

Issues requiring fixes:

  • Remove the unused max_segment_spans parameter or pass it to the Lua script
  • Add explicit monitoring/alerting when eviction occurs (not just span count metrics)
  • Consider the alternatives: fail fast with warnings, make threshold configurable, or prioritize newer spans differently


local has_root_span_key = string.format("span-buf:hrs:%s", set_key)
local has_root_span = redis.call("get", has_root_span_key) == "1" or is_root_span
if has_root_span then
Expand Down
21 changes: 8 additions & 13 deletions src/sentry/spans/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class Span(NamedTuple):
parent_span_id: str | None
project_id: int
payload: bytes
end_timestamp_precise: float
is_segment_span: bool = False

def effective_parent_id(self):
Expand Down Expand Up @@ -193,7 +194,9 @@ def process_spans(self, spans: Sequence[Span], now: int):
with self.client.pipeline(transaction=False) as p:
for (project_and_trace, parent_span_id), subsegment in trees.items():
set_key = f"span-buf:s:{{{project_and_trace}}}:{parent_span_id}"
p.sadd(set_key, *[span.payload for span in subsegment])
p.zadd(
set_key, {span.payload: span.end_timestamp_precise for span in subsegment}
)

p.execute()

Expand Down Expand Up @@ -428,13 +431,13 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
with self.client.pipeline(transaction=False) as p:
current_keys = []
for key, cursor in cursors.items():
p.sscan(key, cursor=cursor, count=self.segment_page_size)
p.zscan(key, cursor=cursor, count=self.segment_page_size)
current_keys.append(key)

results = p.execute()

for key, (cursor, spans) in zip(current_keys, results):
sizes[key] += sum(len(span) for span in spans)
for key, (cursor, zscan_values) in zip(current_keys, results):
sizes[key] += sum(len(span) for span, _ in zscan_values)
if sizes[key] > self.max_segment_bytes:
metrics.incr("spans.buffer.flush_segments.segment_size_exceeded")
logger.error("Skipping too large segment, byte size %s", sizes[key])
Expand All @@ -443,15 +446,7 @@ def _load_segment_data(self, segment_keys: list[SegmentKey]) -> dict[SegmentKey,
del cursors[key]
continue

payloads[key].extend(spans)
if len(payloads[key]) > self.max_segment_spans:
metrics.incr("spans.buffer.flush_segments.segment_span_count_exceeded")
logger.error("Skipping too large segment, span count %s", len(payloads[key]))

del payloads[key]
del cursors[key]
continue

payloads[key].extend(span for span, _ in zscan_values)
if cursor == 0:
del cursors[key]
else:
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/spans/consumers/process/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from collections.abc import Callable, Mapping
from functools import partial
from typing import cast

import rapidjson
from arroyo.backends.kafka.consumer import KafkaPayload
Expand All @@ -10,6 +11,7 @@
from arroyo.processing.strategies.commit import CommitOffsets
from arroyo.processing.strategies.run_task import RunTask
from arroyo.types import Commit, FilteredPayload, Message, Partition
from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent

from sentry.spans.buffer import Span, SpansBuffer
from sentry.spans.consumers.process.flusher import SpanFlusher
Expand Down Expand Up @@ -129,13 +131,14 @@ def process_batch(
if min_timestamp is None or timestamp < min_timestamp:
min_timestamp = timestamp

val = rapidjson.loads(payload.value)
val = cast(SpanEvent, rapidjson.loads(payload.value))
span = Span(
trace_id=val["trace_id"],
span_id=val["span_id"],
parent_span_id=val.get("parent_span_id"),
project_id=val["project_id"],
payload=payload.value,
end_timestamp_precise=val["end_timestamp_precise"],
is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
)
Comment on lines +134 to 143

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify that end_timestamp_precise is always present in payloads.

Line 141 accesses val["end_timestamp_precise"] directly, which will raise a KeyError if this field is missing from incoming payloads. Ensure that all upstream producers (Relay) always include this field, or add defensive error handling.

Run the following script to check if there are any payload producers or test cases that might send spans without this field:

If any producers don't guarantee this field, consider adding error handling:

         val = cast(SpanEvent, rapidjson.loads(payload.value))
+        end_timestamp = val.get("end_timestamp_precise")
+        if end_timestamp is None:
+            # Log error or use fallback
+            end_timestamp = timestamp  # or raise exception
+        
         span = Span(
             trace_id=val["trace_id"],
             span_id=val["span_id"],
             parent_span_id=val.get("parent_span_id"),
             project_id=val["project_id"],
             payload=payload.value,
-            end_timestamp_precise=val["end_timestamp_precise"],
+            end_timestamp_precise=end_timestamp,
             is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
         )

🏁 Script executed:

#!/bin/bash
# Search for span payload construction that might be missing end_timestamp_precise
rg -n -C3 -g '!*.pyc' '"span_id".*"trace_id"' | rg -v 'end_timestamp_precise'

Length of output: 1355


I need to investigate further to understand if end_timestamp_precise is guaranteed in payloads.

I need to check the Span dataclass definition and how end_timestamp_precise is used.




🏁 Script executed:

#!/bin/bash
# Find the Span class definition
ast-grep --pattern $'class Span($_) {
  $$$
}'

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for end_timestamp_precise usage and definition
rg -n 'end_timestamp_precise' -B2 -A2

Length of output: 43239


🏁 Script executed:

#!/bin/bash
# Look at the specific file to understand context better
cat src/sentry/spans/consumers/process/factory.py | head -200

Length of output: 5354


I need to check the SpanEvent schema definition and any validation logic before this code runs.





---

🏁 Script executed:

```shell
#!/bin/bash
# Look for SpanEvent definition and schema validation
rg -n 'class SpanEvent' -A 20 --type=py

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for validation or normalization of span payload
rg -n 'process_batch|rapidjson.loads' -B 5 -A 5 src/sentry/spans/consumers/process/factory.py

Length of output: 1551


🏁 Script executed:

#!/bin/bash
# Check if end_timestamp_precise might be optional in the schema
cat src/sentry/issues/event.schema.json | grep -A 30 '"span_id".*"trace_id"' | head -60

Length of output: 1423






---

🏁 Script executed:

```shell
#!/bin/bash
# Search for exception handling around process_batch or span processing
rg -n 'process_batch|SpanEvent' -C 3 src/sentry/spans/consumers/process/ --type=py | head -100

Length of output: 2996


🏁 Script executed:

#!/bin/bash
# Check if there's any normalization or validation happening on payloads
rg -n 'end_timestamp_precise' src/sentry/spans/consumers/process/ -B 5

Length of output: 568


🏁 Script executed:

#!/bin/bash
# Look for exception handling in related code
rg -n 'KeyError|try:|except' src/sentry/spans/consumers/process/factory.py

Length of output: 0






---

🏁 Script executed:

```shell
#!/bin/bash
# Check test fixtures for span payloads - are they realistic?
cat tests/sentry/spans/consumers/process/__init__.py

Length of output: 1087


🏁 Script executed:

#!/bin/bash
# Search for any documentation or comments about span payload requirements
rg -n 'end_timestamp_precise|end_timestamp' tests/sentry/spans/consumers/process/test_consumer.py -B 3 -A 1

Length of output: 501


🏁 Script executed:

#!/bin/bash
# Check if there are any recent changes or comments about field handling
git log --oneline -20 src/sentry/spans/consumers/process/factory.py 2>/dev/null || echo "Git not available"

Length of output: 168






---

🏁 Script executed:

```shell
#!/bin/bash
# Check if payload schema validation happens before process_batch
rg -n 'validate|schema|required' src/sentry/spans/consumers/process/ --type=py -C 2

Length of output: 494


🏁 Script executed:

#!/bin/bash
# Look at Span dataclass - are all accessed fields actually required?
cat src/sentry/spans/buffer.py | grep -A 10 "^class Span"

Length of output: 669


🏁 Script executed:

#!/bin/bash
# Check how other "required" fields are accessed - is there a pattern?
rg -n 'val\["trace_id"|val\["project_id"|val\["span_id"' src/sentry/spans/consumers/process/factory.py

Length of output: 127






---

🏁 Script executed:

```shell
#!/bin/bash
# Search for any upstream validation or normalization of span payloads
rg -n 'SpanEvent|ingest_spans' src/sentry/spans/consumers/ -C 2 --type=py | grep -A 5 -B 5 'validate\|check\|ensure\|required'

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check if there's error handling at a higher level (in RunTask or strategy)
rg -n 'except|KeyError' src/sentry/spans/consumers/process/factory.py

Length of output: 0


🌐 Web query:

sentry_kafka_schemas ingest_spans_v1 SpanEvent required fields

💡 Result:

Short answer — per Sentry's Span Protocol (used by ingest pipelines / kafka schemas), a Span (SpanEvent) must include at minimum:

  • trace_id
  • span_id
  • name
  • start_timestamp
  • end_timestamp

(Other fields often present: parent_span_id (nullable), status, kind, is_remote, attributes, and events — but those are optional depending on SDK/exporter.) Source: Sentry Span Protocol documentation. [1][2]

Sources:
[1] Sentry — Span Protocol (fields and payload examples).
[2] sentry-kafka-schemas documentation (topic/schema collection for Sentry ingest topics).


Add defensive handling for end_timestamp_precise field.

Per the Span Protocol, required span fields include trace_id, span_id, name, start_timestamp, and end_timestamp, but end_timestamp_precise is not explicitly listed as required. Line 141 accesses val["end_timestamp_precise"] directly without .get(), which will raise a KeyError if this field is missing from payloads. Since there is no upstream validation and the field is not guaranteed by the protocol, add defensive error handling to either use a fallback value or raise a clear error:

         val = cast(SpanEvent, rapidjson.loads(payload.value))
+        end_timestamp = val.get("end_timestamp_precise")
+        if end_timestamp is None:
+            logger.error("Span missing end_timestamp_precise")
+            end_timestamp = val.get("end_timestamp", timestamp)
+        
         span = Span(
             trace_id=val["trace_id"],
             span_id=val["span_id"],
             parent_span_id=val.get("parent_span_id"),
             project_id=val["project_id"],
             payload=payload.value,
-            end_timestamp_precise=val["end_timestamp_precise"],
+            end_timestamp_precise=end_timestamp,
             is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")),
         )

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/sentry/spans/consumers/process/factory.py around lines 134 to 143, the
code accesses val["end_timestamp_precise"] directly which will raise KeyError
for payloads missing that optional field; update the construction to defensively
read end_timestamp_precise via val.get("end_timestamp_precise") and provide a
sensible fallback (e.g., None or derive from end_timestamp if available), or
explicitly check for the key and raise a clear, contextual error/log message
before creating the Span so missing fields are handled deterministically.

spans.append(span)
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/utils/cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(
has_results: bool | None = None,
):
self.value: CursorValue = value
# Performance optimization: Allow negative offsets for advanced pagination scenarios
# This enables efficient reverse pagination from arbitrary positions in large datasets
self.offset = int(offset)
Comment on lines +26 to 28

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Misleading documentation: No code change supports negative offsets.

The comment claims to "allow negative offsets for advanced pagination scenarios," but the code still simply casts offset to int(offset) without any special handling, validation, or usage of negative values. If negative offsets are intended for advanced pagination, the implementation should validate and handle them appropriately.

Consider either:

  1. Removing the comment if negative offsets aren't actually used yet, or
  2. Adding validation/handling logic if they are genuinely supported
-        # Performance optimization: Allow negative offsets for advanced pagination scenarios
-        # This enables efficient reverse pagination from arbitrary positions in large datasets
         self.offset = int(offset)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Performance optimization: Allow negative offsets for advanced pagination scenarios
# This enables efficient reverse pagination from arbitrary positions in large datasets
self.offset = int(offset)
self.offset = int(offset)
🤖 Prompt for AI Agents
In src/sentry/utils/cursors.py around lines 26 to 28, the comment claims support
for negative offsets but the code only casts offset with int(offset) and does no
validation or special handling; either remove the misleading comment or
implement proper support: validate the input is an integer (raise a clear
ValueError on non-int), explicitly allow and store negative values (optionally
clamp to an acceptable range or document the allowed range), and add any
downstream handling required by the cursor logic to interpret negative offsets
for reverse pagination; update the comment to accurately reflect the implemented
behavior.

self.is_prev = bool(is_prev)
self.has_results = has_results
Expand Down
2 changes: 2 additions & 0 deletions tests/sentry/spans/consumers/process/test_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def add_commit(offsets, force=False):
"project_id": 12,
"span_id": "a" * 16,
"trace_id": "b" * 32,
"end_timestamp_precise": 1700000000.0,
}
).encode("ascii"),
[],
Expand Down Expand Up @@ -69,6 +70,7 @@ def add_commit(offsets, force=False):
"segment_id": "aaaaaaaaaaaaaaaa",
"span_id": "aaaaaaaaaaaaaaaa",
"trace_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"end_timestamp_precise": 1700000000.0,
},
],
}
4 changes: 4 additions & 0 deletions tests/sentry/spans/consumers/process/test_flusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@ def append(msg):
span_id="a" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"d" * 16),
trace_id=trace_id,
span_id="d" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"c" * 16),
trace_id=trace_id,
span_id="c" * 16,
parent_span_id="b" * 16,
project_id=1,
end_timestamp_precise=now,
),
Span(
payload=_payload(b"b" * 16),
Expand All @@ -66,6 +69,7 @@ def append(msg):
parent_span_id=None,
is_segment_span=True,
project_id=1,
end_timestamp_precise=now,
),
]

Expand Down
Loading