From 6c5906d1a6584be1495ee46674cdef9a5b27c93a Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Fri, 20 Mar 2026 14:35:31 +0100 Subject: [PATCH 01/12] fix(cache): cache envelopes only on failed HTTP send Previously, cache_keep unconditionally copied envelopes to the cache directory on restart (during process_old_runs), regardless of whether sending succeeded or failed. Now envelopes are cached only when the HTTP send actually fails, fixing the behavior to match the intended "offline caching" use case. - Add cache_path and refcount to sentry_run_t - Extract http_send_envelope() helper to get status_code - Cache on failed send in http_send_task when cache_keep is enabled - Add cleanup_func on transport for async cache pruning on bgworker - Remove unconditional caching from process_old_runs - Update integration tests to use HTTP transport with unreachable DSN Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 6 ++ examples/example.c | 4 ++ include/sentry.h | 11 +-- src/backends/sentry_backend_crashpad.cpp | 8 +-- src/sentry_core.c | 4 +- src/sentry_database.c | 61 ++++++++++------ src/sentry_database.h | 14 ++++ src/sentry_transport.c | 21 +++++- src/sentry_transport.h | 18 +++++ src/transports/sentry_http_transport.c | 92 ++++++++++++++++++------ tests/test_integration_cache.py | 76 +++++++++++++------- tests/test_unit.py | 4 +- tests/unit/test_cache.c | 2 + 13 files changed, 236 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46e19de94b..0da5f069a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +**Fixes**: + +- Fix `cache_keep` to only cache envelopes when HTTP send fails, instead of unconditionally on restart. ([#1585](https://github.com/getsentry/sentry-native/pull/1585)) + ## 0.13.3 **Features**: diff --git a/examples/example.c b/examples/example.c index 8fc7db7b35..15923be484 100644 --- a/examples/example.c +++ b/examples/example.c @@ -940,6 +940,10 @@ main(int argc, char **argv) sentry_reinstall_backend(); } + if (has_arg(argc, argv, "flush")) { + sentry_flush(10000); + } + if (has_arg(argc, argv, "sleep")) { sleep_s(10); } diff --git a/include/sentry.h b/include/sentry.h index 89ca0d143a..3a8281e71f 100644 --- a/include/sentry.h +++ b/include/sentry.h @@ -1476,12 +1476,13 @@ SENTRY_API int sentry_options_get_symbolize_stacktraces( const sentry_options_t *opts); /** - * Enables or disables storing envelopes in a persistent cache. + * Enables or disables storing envelopes that fail to send in a persistent cache. * - * When enabled, envelopes are written to a `cache/` subdirectory within the - * database directory and retained regardless of send success or failure. - * The cache is cleared on startup based on the cache_max_items, cache_max_size, - * and cache_max_age options. + * When enabled, envelopes that fail to send are written to a `cache/` + * subdirectory within the database directory. The cache is cleared on startup + * based on the cache_max_items, cache_max_size, and cache_max_age options. + * + * Only applicable for HTTP transports. * * Disabled by default. */ diff --git a/src/backends/sentry_backend_crashpad.cpp b/src/backends/sentry_backend_crashpad.cpp index 9815bdb420..ba7420734e 100644 --- a/src/backends/sentry_backend_crashpad.cpp +++ b/src/backends/sentry_backend_crashpad.cpp @@ -565,11 +565,9 @@ process_completed_reports( SENTRY_DEBUGF("caching %zu completed reports", reports.size()); - sentry_path_t *cache_dir - = sentry__path_join_str(options->database_path, "cache"); - if (!cache_dir || sentry__path_create_dir_all(cache_dir) != 0) { + sentry_path_t *cache_dir = options->run->cache_path; + if (sentry__path_create_dir_all(cache_dir) != 0) { SENTRY_WARN("failed to create cache dir"); - sentry__path_free(cache_dir); return; } @@ -593,8 +591,6 @@ process_completed_reports( sentry__path_free(out_path); sentry_envelope_free(envelope); } - - sentry__path_free(cache_dir); } static int diff --git a/src/sentry_core.c b/src/sentry_core.c index 3b4b38bfbb..30a43f1994 100644 --- a/src/sentry_core.c +++ b/src/sentry_core.c @@ -293,7 +293,9 @@ sentry_init(sentry_options_t *options) } if (options->cache_keep) { - sentry__cleanup_cache(options); + if (!sentry__transport_submit_cleanup(options->transport, options)) { + sentry__cleanup_cache(options); + } } if (options->auto_session_tracking) { diff --git a/src/sentry_database.c b/src/sentry_database.c index 45f8b8eb14..a28493c86f 100644 --- a/src/sentry_database.c +++ b/src/sentry_database.c @@ -4,6 +4,9 @@ #include "sentry_json.h" #include "sentry_options.h" #include "sentry_session.h" +#include "sentry_sync.h" +#include "sentry_transport.h" +#include "sentry_utils.h" #include "sentry_uuid.h" #include #include @@ -50,19 +53,32 @@ sentry__run_new(const sentry_path_t *database_path) return NULL; } + // `/cache` + sentry_path_t *cache_path = sentry__path_join_str(database_path, "cache"); + if (!cache_path) { + sentry__path_free(run_path); + sentry__path_free(lock_path); + sentry__path_free(session_path); + sentry__path_free(external_path); + return NULL; + } + sentry_run_t *run = SENTRY_MAKE(sentry_run_t); if (!run) { sentry__path_free(run_path); sentry__path_free(session_path); sentry__path_free(lock_path); sentry__path_free(external_path); + sentry__path_free(cache_path); return NULL; } + run->refcount = 1; run->uuid = uuid; run->run_path = run_path; run->session_path = session_path; run->external_path = external_path; + run->cache_path = cache_path; run->lock = sentry__filelock_new(lock_path); if (!run->lock) { goto error; @@ -80,6 +96,15 @@ sentry__run_new(const sentry_path_t *database_path) return NULL; } +sentry_run_t * +sentry__run_incref(sentry_run_t *run) +{ + if (run) { + sentry__atomic_fetch_and_add(&run->refcount, 1); + } + return run; +} + void sentry__run_clean(sentry_run_t *run) { @@ -90,12 +115,13 @@ sentry__run_clean(sentry_run_t *run) void sentry__run_free(sentry_run_t *run) { - if (!run) { + if (!run || sentry__atomic_fetch_and_add(&run->refcount, -1) != 1) { return; } sentry__path_free(run->run_path); sentry__path_free(run->session_path); sentry__path_free(run->external_path); + sentry__path_free(run->cache_path); sentry__filelock_free(run->lock); sentry_free(run); } @@ -151,6 +177,18 @@ sentry__run_write_external( return write_envelope(run->external_path, envelope); } +bool +sentry__run_write_cache( + const sentry_run_t *run, const sentry_envelope_t *envelope) +{ + if (sentry__path_create_dir_all(run->cache_path) != 0) { + SENTRY_ERRORF("mkdir failed: \"%s\"", run->cache_path->path); + return false; + } + + return write_envelope(run->cache_path, envelope); +} + bool sentry__run_write_session( const sentry_run_t *run, const sentry_session_t *session) @@ -239,14 +277,6 @@ sentry__process_old_runs(const sentry_options_t *options, uint64_t last_crash) continue; } - sentry_path_t *cache_dir = NULL; - if (options->cache_keep) { - cache_dir = sentry__path_join_str(options->database_path, "cache"); - if (cache_dir) { - sentry__path_create_dir_all(cache_dir); - } - } - sentry_pathiter_t *run_iter = sentry__path_iter_directory(run_dir); const sentry_path_t *file; while (run_iter && (file = sentry__pathiter_next(run_iter)) != NULL) { @@ -291,25 +321,12 @@ sentry__process_old_runs(const sentry_options_t *options, uint64_t last_crash) } else if (sentry__path_ends_with(file, ".envelope")) { sentry_envelope_t *envelope = sentry__envelope_from_path(file); sentry__capture_envelope(options->transport, envelope); - - if (cache_dir) { - sentry_path_t *cached_file = sentry__path_join_str( - cache_dir, sentry__path_filename(file)); - if (!cached_file - || sentry__path_rename(file, cached_file) != 0) { - SENTRY_WARNF("failed to cache envelope \"%s\"", - sentry__path_filename(file)); - } - sentry__path_free(cached_file); - continue; - } } sentry__path_remove(file); } sentry__pathiter_free(run_iter); - sentry__path_free(cache_dir); sentry__path_remove_all(run_dir); sentry__filelock_free(lock); } diff --git a/src/sentry_database.h b/src/sentry_database.h index c3fee8bc07..9090857d80 100644 --- a/src/sentry_database.h +++ b/src/sentry_database.h @@ -11,7 +11,9 @@ typedef struct sentry_run_s { sentry_path_t *run_path; sentry_path_t *session_path; sentry_path_t *external_path; + sentry_path_t *cache_path; sentry_filelock_t *lock; + long refcount; } sentry_run_t; /** @@ -22,6 +24,11 @@ typedef struct sentry_run_s { */ sentry_run_t *sentry__run_new(const sentry_path_t *database_path); +/** + * Increment the refcount and return the run pointer. + */ +sentry_run_t *sentry__run_incref(sentry_run_t *run); + /** * This will clean up all the files belonging to this run. */ @@ -63,6 +70,13 @@ bool sentry__run_write_session( */ bool sentry__run_clear_session(const sentry_run_t *run); +/** + * This will serialize and write the given envelope to disk into the cache + * directory: `/cache/.envelope` + */ +bool sentry__run_write_cache( + const sentry_run_t *run, const sentry_envelope_t *envelope); + /** * This function is essential to send crash reports from previous runs of the * program. diff --git a/src/sentry_transport.c b/src/sentry_transport.c index 6b63c57837..190c4625a5 100644 --- a/src/sentry_transport.c +++ b/src/sentry_transport.c @@ -10,6 +10,7 @@ struct sentry_transport_s { int (*flush_func)(uint64_t timeout, void *state); void (*free_func)(void *state); size_t (*dump_func)(sentry_run_t *run, void *state); + void (*cleanup_func)(const sentry_options_t *options, void *state); void *state; bool running; }; @@ -92,7 +93,7 @@ sentry__transport_startup( int sentry__transport_flush(sentry_transport_t *transport, uint64_t timeout) { - if (transport->flush_func && transport->running) { + if (transport && transport->flush_func && transport->running) { SENTRY_DEBUG("flushing transport"); return transport->flush_func(timeout, transport->state); } @@ -147,3 +148,21 @@ sentry__transport_get_state(sentry_transport_t *transport) { return transport ? transport->state : NULL; } + +void +sentry__transport_set_cleanup_func(sentry_transport_t *transport, + void (*cleanup_func)(const sentry_options_t *options, void *state)) +{ + transport->cleanup_func = cleanup_func; +} + +bool +sentry__transport_submit_cleanup( + sentry_transport_t *transport, const sentry_options_t *options) +{ + if (transport && transport->cleanup_func && transport->running) { + transport->cleanup_func(options, transport->state); + return true; + } + return false; +} diff --git a/src/sentry_transport.h b/src/sentry_transport.h index 0362332847..f03274979f 100644 --- a/src/sentry_transport.h +++ b/src/sentry_transport.h @@ -57,4 +57,22 @@ size_t sentry__transport_dump_queue( void *sentry__transport_get_state(sentry_transport_t *transport); +/** + * Sets the cleanup function of the transport. + * + * This function submits cache cleanup as a task on the transport's background + * worker, so it runs after any pending send tasks from process_old_runs. + */ +void sentry__transport_set_cleanup_func(sentry_transport_t *transport, + void (*cleanup_func)(const sentry_options_t *options, void *state)); + +/** + * Submits cache cleanup to the transport's background worker. + * + * Returns true if cleanup was submitted, false if the transport does not + * support async cleanup (caller should run cleanup synchronously). + */ +bool sentry__transport_submit_cleanup( + sentry_transport_t *transport, const sentry_options_t *options); + #endif diff --git a/src/transports/sentry_http_transport.c b/src/transports/sentry_http_transport.c index 24b1ba566d..91c8b2478e 100644 --- a/src/transports/sentry_http_transport.c +++ b/src/transports/sentry_http_transport.c @@ -6,6 +6,7 @@ #include "sentry_ratelimiter.h" #include "sentry_string.h" #include "sentry_transport.h" +#include "sentry_utils.h" #ifdef SENTRY_TRANSPORT_COMPRESSION # include "zlib.h" @@ -29,6 +30,8 @@ typedef struct { int (*start_client)(void *, const sentry_options_t *); sentry_http_send_func_t send_func; void (*shutdown_client)(void *client); + bool cache_keep; + sentry_run_t *run; } http_transport_state_t; #ifdef SENTRY_TRANSPORT_COMPRESSION @@ -182,6 +185,47 @@ sentry__prepared_http_request_free(sentry_prepared_http_request_t *req) sentry_free(req); } +static int +http_send_request( + http_transport_state_t *state, sentry_prepared_http_request_t *req) +{ + sentry_http_response_t resp; + memset(&resp, 0, sizeof(resp)); + + if (!state->send_func(state->client, req, &resp)) { + sentry_free(resp.retry_after); + sentry_free(resp.x_sentry_rate_limits); + return -1; + } + + if (resp.x_sentry_rate_limits) { + sentry__rate_limiter_update_from_header( + state->ratelimiter, resp.x_sentry_rate_limits); + } else if (resp.retry_after) { + sentry__rate_limiter_update_from_http_retry_after( + state->ratelimiter, resp.retry_after); + } else if (resp.status_code == 429) { + sentry__rate_limiter_update_from_429(state->ratelimiter); + } + + sentry_free(resp.retry_after); + sentry_free(resp.x_sentry_rate_limits); + return resp.status_code; +} + +static int +http_send_envelope(http_transport_state_t *state, sentry_envelope_t *envelope) +{ + sentry_prepared_http_request_t *req = sentry__prepare_http_request( + envelope, state->dsn, state->ratelimiter, state->user_agent); + if (!req) { + return 0; + } + int status_code = http_send_request(state, req); + sentry__prepared_http_request_free(req); + return status_code; +} + static void http_transport_state_free(void *_state) { @@ -192,6 +236,7 @@ http_transport_state_free(void *_state) sentry__dsn_decref(state->dsn); sentry_free(state->user_agent); sentry__rate_limiter_free(state->ratelimiter); + sentry__run_free(state->run); sentry_free(state); } @@ -201,29 +246,10 @@ http_send_task(void *_envelope, void *_state) sentry_envelope_t *envelope = _envelope; http_transport_state_t *state = _state; - sentry_prepared_http_request_t *req = sentry__prepare_http_request( - envelope, state->dsn, state->ratelimiter, state->user_agent); - if (!req) { - return; + int status_code = http_send_envelope(state, envelope); + if (status_code < 0 && state->cache_keep) { + sentry__run_write_cache(state->run, envelope); } - - sentry_http_response_t resp; - memset(&resp, 0, sizeof(resp)); - - if (state->send_func(state->client, req, &resp)) { - if (resp.x_sentry_rate_limits) { - sentry__rate_limiter_update_from_header( - state->ratelimiter, resp.x_sentry_rate_limits); - } else if (resp.retry_after) { - sentry__rate_limiter_update_from_http_retry_after( - state->ratelimiter, resp.retry_after); - } else if (resp.status_code == 429) { - sentry__rate_limiter_update_from_429(state->ratelimiter); - } - } - sentry_free(resp.retry_after); - sentry_free(resp.x_sentry_rate_limits); - sentry__prepared_http_request_free(req); } static int @@ -236,6 +262,8 @@ http_transport_start(const sentry_options_t *options, void *transport_state) state->dsn = sentry__dsn_incref(options->dsn); state->user_agent = sentry__string_clone(options->user_agent); + state->cache_keep = options->cache_keep; + state->run = sentry__run_incref(options->run); if (state->start_client) { int rv = state->start_client(state->client, options); @@ -298,6 +326,24 @@ http_transport_get_state(sentry_transport_t *transport) return sentry__bgworker_get_state(bgworker); } +static void +http_cleanup_cache_task(void *task_data, void *_state) +{ + (void)_state; + sentry_options_t *options = task_data; + sentry__cleanup_cache(options); +} + +static void +http_transport_submit_cleanup( + const sentry_options_t *options, void *transport_state) +{ + sentry_bgworker_t *bgworker = transport_state; + sentry__bgworker_submit(bgworker, http_cleanup_cache_task, + (void (*)(void *))sentry_options_free, + sentry__options_incref((sentry_options_t *)options)); +} + sentry_transport_t * sentry__http_transport_new(void *client, sentry_http_send_func_t send_func) { @@ -331,6 +377,8 @@ sentry__http_transport_new(void *client, sentry_http_send_func_t send_func) sentry_transport_set_flush_func(transport, http_transport_flush); sentry_transport_set_shutdown_func(transport, http_transport_shutdown); sentry__transport_set_dump_func(transport, http_dump_queue); + sentry__transport_set_cleanup_func( + transport, http_transport_submit_cleanup); return transport; } diff --git a/tests/test_integration_cache.py b/tests/test_integration_cache.py index aff10fa9ff..5f58e0c62d 100644 --- a/tests/test_integration_cache.py +++ b/tests/test_integration_cache.py @@ -3,9 +3,14 @@ import pytest from . import run -from .conditions import has_breakpad, has_files +from .conditions import has_breakpad, has_files, has_http -pytestmark = pytest.mark.skipif(not has_files, reason="tests need local filesystem") +pytestmark = [ + pytest.mark.skipif(not has_files, reason="tests need local filesystem"), + pytest.mark.skipif(not has_http, reason="tests need http transport"), +] + +unreachable_dsn = "http://uiaeosnrtdy@127.0.0.1:19999/123456" @pytest.mark.parametrize("cache_keep", [True, False]) @@ -22,10 +27,9 @@ ], ) def test_cache_keep(cmake, backend, cache_keep): - tmp_path = cmake( - ["sentry_example"], {"SENTRY_BACKEND": backend, "SENTRY_TRANSPORT": "none"} - ) + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": backend}) cache_dir = tmp_path.joinpath(".sentry-native/cache") + env = dict(os.environ, SENTRY_DSN=unreachable_dsn) # capture run( @@ -33,6 +37,7 @@ def test_cache_keep(cmake, backend, cache_keep): "sentry_example", ["log", "crash"] + (["cache-keep"] if cache_keep else []), expect_failure=True, + env=env, ) assert not cache_dir.exists() or len(list(cache_dir.glob("*.envelope"))) == 0 @@ -42,6 +47,7 @@ def test_cache_keep(cmake, backend, cache_keep): tmp_path, "sentry_example", ["log", "no-setup"] + (["cache-keep"] if cache_keep else []), + env=env, ) assert cache_dir.exists() or cache_keep is False @@ -63,34 +69,42 @@ def test_cache_keep(cmake, backend, cache_keep): ], ) def test_cache_max_size(cmake, backend): - tmp_path = cmake( - ["sentry_example"], {"SENTRY_BACKEND": backend, "SENTRY_TRANSPORT": "none"} - ) + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": backend}) cache_dir = tmp_path.joinpath(".sentry-native/cache") + env = dict(os.environ, SENTRY_DSN=unreachable_dsn) - # 5 x 2mb for i in range(5): run( tmp_path, "sentry_example", ["log", "cache-keep", "crash"], expect_failure=True, + env=env, ) - if cache_dir.exists(): - cache_files = list(cache_dir.glob("*.envelope")) - for f in cache_files: - with open(f, "r+b") as file: - file.truncate(2 * 1024 * 1024) + # flush + cache + run( + tmp_path, + "sentry_example", + ["log", "cache-keep", "flush", "no-setup"], + env=env, + ) + + # 5 x 2mb + assert cache_dir.exists() + cache_files = list(cache_dir.glob("*.envelope")) + for f in cache_files: + with open(f, "r+b") as file: + file.truncate(2 * 1024 * 1024) + # max 4mb run( tmp_path, "sentry_example", ["log", "cache-keep", "no-setup"], + env=env, ) - # max 4mb - assert cache_dir.exists() cache_files = list(cache_dir.glob("*.envelope")) assert len(cache_files) <= 2 assert sum(f.stat().st_size for f in cache_files) <= 4 * 1024 * 1024 @@ -109,10 +123,9 @@ def test_cache_max_size(cmake, backend): ], ) def test_cache_max_age(cmake, backend): - tmp_path = cmake( - ["sentry_example"], {"SENTRY_BACKEND": backend, "SENTRY_TRANSPORT": "none"} - ) + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": backend}) cache_dir = tmp_path.joinpath(".sentry-native/cache") + env = dict(os.environ, SENTRY_DSN=unreachable_dsn) for i in range(5): run( @@ -120,8 +133,17 @@ def test_cache_max_age(cmake, backend): "sentry_example", ["log", "cache-keep", "crash"], expect_failure=True, + env=env, ) + # flush + cache + run( + tmp_path, + "sentry_example", + ["log", "cache-keep", "flush", "no-setup"], + env=env, + ) + # 2,4,6,8,10 days old assert cache_dir.exists() cache_files = list(cache_dir.glob("*.envelope")) @@ -129,16 +151,16 @@ def test_cache_max_age(cmake, backend): mtime = time.time() - ((i + 1) * 2 * 24 * 60 * 60) os.utime(str(f), (mtime, mtime)) - # 0 days old + # max 5 days run( tmp_path, "sentry_example", ["log", "cache-keep", "no-setup"], + env=env, ) - # max 5 days cache_files = list(cache_dir.glob("*.envelope")) - assert len(cache_files) == 3 + assert len(cache_files) == 2 for f in cache_files: assert time.time() - f.stat().st_mtime <= 5 * 24 * 60 * 60 @@ -156,10 +178,9 @@ def test_cache_max_age(cmake, backend): ], ) def test_cache_max_items(cmake, backend): - tmp_path = cmake( - ["sentry_example"], {"SENTRY_BACKEND": backend, "SENTRY_TRANSPORT": "none"} - ) + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": backend}) cache_dir = tmp_path.joinpath(".sentry-native/cache") + env = dict(os.environ, SENTRY_DSN=unreachable_dsn) for i in range(6): run( @@ -167,12 +188,15 @@ def test_cache_max_items(cmake, backend): "sentry_example", ["log", "cache-keep", "crash"], expect_failure=True, + env=env, ) + # flush + cache run( tmp_path, "sentry_example", - ["log", "cache-keep", "no-setup"], + ["log", "cache-keep", "flush", "no-setup"], + env=env, ) # max 5 items diff --git a/tests/test_unit.py b/tests/test_unit.py index c38fffb911..daa5712577 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -5,7 +5,7 @@ def test_unit(cmake, unittest): - if unittest in ["basic_transport_thread_name"]: + if unittest in ["basic_transport_thread_name", "cache_keep"]: pytest.skip("excluded from unit test-suite") cwd = cmake( ["sentry_test_unit"], @@ -33,7 +33,7 @@ def test_unit_transport(cmake, unittest): def test_unit_with_test_path(cmake, unittest): - if unittest in ["basic_transport_thread_name"]: + if unittest in ["basic_transport_thread_name", "cache_keep"]: pytest.skip("excluded from unit test-suite") cwd = cmake( ["sentry_test_unit"], diff --git a/tests/unit/test_cache.c b/tests/unit/test_cache.c index e161340bc8..760266c70e 100644 --- a/tests/unit/test_cache.c +++ b/tests/unit/test_cache.c @@ -44,6 +44,7 @@ SENTRY_TEST(cache_keep) SKIP_TEST(); #endif SENTRY_TEST_OPTIONS_NEW(options); + TEST_ASSERT(!!options->transport); sentry_options_set_dsn(options, "https://foo@sentry.invalid/42"); sentry_options_set_cache_keep(options, true); sentry_init(options); @@ -80,6 +81,7 @@ SENTRY_TEST(cache_keep) TEST_ASSERT(!sentry__path_is_file(cached_envelope_path)); sentry__process_old_runs(options, 0); + sentry_flush(5000); TEST_ASSERT(!sentry__path_is_file(old_envelope_path)); TEST_ASSERT(sentry__path_is_file(cached_envelope_path)); From 81ab489c9aaa840e98166542cc1dfe50efeaf867 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 18 Mar 2026 19:34:07 +0100 Subject: [PATCH 02/12] feat(native): offline caching The native daemon uses the same HTTP transport, so it gets offline caching for free as long as the cache_keep option is propagated via shared memory. Also, disable http_retry in the daemon since it is a single-shot process where retry backoff makes no sense. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 4 ++++ src/backends/native/sentry_crash_context.h | 1 + src/backends/native/sentry_crash_daemon.c | 1 + src/backends/sentry_backend_native.c | 1 + tests/assertions.py | 3 ++- tests/test_integration_native.py | 28 ++++++++++++++++++++++ 6 files changed, 37 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0da5f069a5..0935c9b1a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +**Features**: + +- Add offline caching support to the new experimental `native` backend. ([#1585](https://github.com/getsentry/sentry-native/pull/1585)) + **Fixes**: - Fix `cache_keep` to only cache envelopes when HTTP send fails, instead of unconditionally on restart. ([#1585](https://github.com/getsentry/sentry-native/pull/1585)) diff --git a/src/backends/native/sentry_crash_context.h b/src/backends/native/sentry_crash_context.h index f8b1a27e45..d457499490 100644 --- a/src/backends/native/sentry_crash_context.h +++ b/src/backends/native/sentry_crash_context.h @@ -260,6 +260,7 @@ typedef struct { int crash_reporting_mode; // sentry_crash_reporting_mode_t bool debug_enabled; // Debug logging enabled in parent process bool attach_screenshot; // Screenshot attachment enabled in parent process + bool cache_keep; // Platform-specific crash context #if defined(SENTRY_PLATFORM_LINUX) || defined(SENTRY_PLATFORM_ANDROID) diff --git a/src/backends/native/sentry_crash_daemon.c b/src/backends/native/sentry_crash_daemon.c index d45ed7bd00..2a18942474 100644 --- a/src/backends/native/sentry_crash_daemon.c +++ b/src/backends/native/sentry_crash_daemon.c @@ -2966,6 +2966,7 @@ sentry__crash_daemon_main(pid_t app_pid, uint64_t app_tid, HANDLE event_handle, // Use debug logging and screenshot settings from parent process sentry_options_set_debug(options, ipc->shmem->debug_enabled); options->attach_screenshot = ipc->shmem->attach_screenshot; + options->cache_keep = ipc->shmem->cache_keep; // Set custom logger that writes to file if (log_file) { diff --git a/src/backends/sentry_backend_native.c b/src/backends/sentry_backend_native.c index 1f13c1c18a..d0dc8a5bd8 100644 --- a/src/backends/sentry_backend_native.c +++ b/src/backends/sentry_backend_native.c @@ -176,6 +176,7 @@ native_backend_startup( // Pass debug logging setting to daemon ctx->debug_enabled = options->debug; ctx->attach_screenshot = options->attach_screenshot; + ctx->cache_keep = options->cache_keep; // Set up event and breadcrumb paths sentry_path_t *run_path = options->run->run_path; diff --git a/tests/assertions.py b/tests/assertions.py index 8e9c1169e7..6af26af76f 100644 --- a/tests/assertions.py +++ b/tests/assertions.py @@ -559,11 +559,12 @@ def assert_failed_proxy_auth_request(stdout): def wait_for_file(path, timeout=10.0, poll_interval=0.1): + import glob import time deadline = time.time() + timeout while time.time() < deadline: - if path.exists(): + if glob.glob(str(path)): return True time.sleep(poll_interval) return False diff --git a/tests/test_integration_native.py b/tests/test_integration_native.py index 065c4b3598..f85597797f 100644 --- a/tests/test_integration_native.py +++ b/tests/test_integration_native.py @@ -19,6 +19,7 @@ from .assertions import ( assert_meta, assert_session, + wait_for_file, ) from .conditions import has_native, is_kcov, is_asan @@ -626,3 +627,30 @@ def test_crash_mode_native_with_minidump(cmake, httpserver): # Should have debug_meta assert "debug_meta" in event + + +@pytest.mark.parametrize("cache_keep", [True, False]) +def test_native_cache_keep(cmake, cache_keep): + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "native"}) + db_dir = tmp_path / ".sentry-native" + cache_dir = db_dir / "cache" + unreachable_dsn = "http://uiaeosnrtdy@127.0.0.1:19999/123456" + env = dict(os.environ, SENTRY_DSN=unreachable_dsn) + + # crash -> daemon sends via HTTP -> unreachable -> cache + run_crash( + tmp_path, + "sentry_example", + ["log", "stdout", "crash"] + (["cache-keep"] if cache_keep else []), + env=env, + ) + + if cache_keep: + assert wait_for_file(cache_dir / "*.envelope") + assert len(list(cache_dir.glob("*.envelope"))) == 1 + else: + # Best-effort wait for crash processing to finish. 2s is not + # guaranteed to be enough, but we cannot poll for the non-existence + # of a file. + time.sleep(2) + assert len(list(cache_dir.glob("*.envelope"))) == 0 From 52fd7db89e8f4e970da457b1bfd4e80f02fe415c Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Fri, 20 Mar 2026 15:11:43 +0100 Subject: [PATCH 03/12] fix formatting --- include/sentry.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/sentry.h b/include/sentry.h index 3a8281e71f..1b3d7557e0 100644 --- a/include/sentry.h +++ b/include/sentry.h @@ -1476,7 +1476,8 @@ SENTRY_API int sentry_options_get_symbolize_stacktraces( const sentry_options_t *opts); /** - * Enables or disables storing envelopes that fail to send in a persistent cache. + * Enables or disables storing envelopes that fail to send in a persistent + * cache. * * When enabled, envelopes that fail to send are written to a `cache/` * subdirectory within the database directory. The cache is cleared on startup From 37b6575705ba6e00a3f04e8d76c8b63dc04c323f Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Fri, 20 Mar 2026 15:22:10 +0100 Subject: [PATCH 04/12] remove unused include --- src/sentry_database.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry_database.c b/src/sentry_database.c index a28493c86f..fa6693ccff 100644 --- a/src/sentry_database.c +++ b/src/sentry_database.c @@ -5,7 +5,6 @@ #include "sentry_options.h" #include "sentry_session.h" #include "sentry_sync.h" -#include "sentry_transport.h" #include "sentry_utils.h" #include "sentry_uuid.h" #include From c06ecfb0dcd75c6e7a02346953633e2405d067ca Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Fri, 20 Mar 2026 16:23:31 +0100 Subject: [PATCH 05/12] flush --- tests/test_integration_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_integration_cache.py b/tests/test_integration_cache.py index 5f58e0c62d..81741ab91c 100644 --- a/tests/test_integration_cache.py +++ b/tests/test_integration_cache.py @@ -42,11 +42,11 @@ def test_cache_keep(cmake, backend, cache_keep): assert not cache_dir.exists() or len(list(cache_dir.glob("*.envelope"))) == 0 - # cache + # flush + cache run( tmp_path, "sentry_example", - ["log", "no-setup"] + (["cache-keep"] if cache_keep else []), + ["log", "flush", "no-setup"] + (["cache-keep"] if cache_keep else []), env=env, ) From 06e319b3644a1a46808cb499363387de7174b024 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Tue, 24 Mar 2026 18:23:33 +0100 Subject: [PATCH 06/12] bring in on_timeout from the http-retry branch When shutdown times out, the on_timeout callback cancels the in-flight HTTP request (via curl progress callback abort or WinHTTP handle closure), unblocking the worker thread so it can finish remaining tasks like cache cleanup. Fixes test_cache_max_items failing on Windows where WinHTTP blocks for seconds on unreachable hosts. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/sentry_sync.c | 26 ++++++++++++++----- src/sentry_sync.h | 14 +++++++++- src/transports/sentry_http_transport.c | 15 ++++++++--- src/transports/sentry_http_transport_curl.c | 23 ++++++++++++++++ .../sentry_http_transport_winhttp.c | 16 +++++++----- 5 files changed, 75 insertions(+), 19 deletions(-) diff --git a/src/sentry_sync.c b/src/sentry_sync.c index c0d903f035..bf6c9a9d8d 100644 --- a/src/sentry_sync.c +++ b/src/sentry_sync.c @@ -425,7 +425,8 @@ shutdown_task(void *task_data, void *UNUSED(state)) } int -sentry__bgworker_shutdown(sentry_bgworker_t *bgw, uint64_t timeout) +sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, + void (*on_timeout)(void *), void *on_timeout_data) { if (!sentry__atomic_fetch(&bgw->running)) { SENTRY_WARN("trying to shut down non-running thread"); @@ -441,12 +442,23 @@ sentry__bgworker_shutdown(sentry_bgworker_t *bgw, uint64_t timeout) while (true) { uint64_t now = sentry__monotonic_time(); if (now > started && now - started > timeout) { - sentry__atomic_store(&bgw->running, 0); - sentry__thread_detach(bgw->thread_id); - sentry__mutex_unlock(&bgw->task_lock); - SENTRY_WARN( - "background thread failed to shut down cleanly within timeout"); - return 1; + if (on_timeout) { + // fire on_timeout to cancel the ongoing task, and give the + // worker an extra loop cycle up to 250ms to handle the + // cancellation + sentry__mutex_unlock(&bgw->task_lock); + on_timeout(on_timeout_data); + on_timeout = NULL; + sentry__mutex_lock(&bgw->task_lock); + // fall through to !running check below + } else { + sentry__atomic_store(&bgw->running, 0); + sentry__thread_detach(bgw->thread_id); + sentry__mutex_unlock(&bgw->task_lock); + SENTRY_WARN("background thread failed to shut down cleanly " + "within timeout"); + return 1; + } } if (!sentry__atomic_fetch(&bgw->running)) { diff --git a/src/sentry_sync.h b/src/sentry_sync.h index eab47cd591..d4f6001e7f 100644 --- a/src/sentry_sync.h +++ b/src/sentry_sync.h @@ -469,8 +469,20 @@ int sentry__bgworker_flush(sentry_bgworker_t *bgw, uint64_t timeout); /** * This will try to shut down the background worker thread, with a `timeout`. * Returns 0 on success. + * + * The `_cb` variant accepts an `on_timeout` callback that is invoked when + * the timeout expires, just before detaching the thread. This gives the + * caller a chance to unblock the worker (e.g. by closing transport handles) + * so it can finish in-flight work. */ -int sentry__bgworker_shutdown(sentry_bgworker_t *bgw, uint64_t timeout); +int sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, + void (*on_timeout)(void *), void *on_timeout_data); + +static inline int +sentry__bgworker_shutdown(sentry_bgworker_t *bgw, uint64_t timeout) +{ + return sentry__bgworker_shutdown_cb(bgw, timeout, NULL, NULL); +} /** * This will set a preferable thread name for background worker. diff --git a/src/transports/sentry_http_transport.c b/src/transports/sentry_http_transport.c index 91c8b2478e..70f66188f2 100644 --- a/src/transports/sentry_http_transport.c +++ b/src/transports/sentry_http_transport.c @@ -252,6 +252,15 @@ http_send_task(void *_envelope, void *_state) } } +static void +http_transport_shutdown_timeout(void *_state) +{ + http_transport_state_t *state = _state; + if (state->shutdown_client) { + state->shutdown_client(state->client); + } +} + static int http_transport_start(const sentry_options_t *options, void *transport_state) { @@ -288,10 +297,8 @@ http_transport_shutdown(uint64_t timeout, void *transport_state) sentry_bgworker_t *bgworker = transport_state; http_transport_state_t *state = sentry__bgworker_get_state(bgworker); - int rv = sentry__bgworker_shutdown(bgworker, timeout); - if (rv != 0 && state->shutdown_client) { - state->shutdown_client(state->client); - } + int rv = sentry__bgworker_shutdown_cb( + bgworker, timeout, http_transport_shutdown_timeout, state); return rv; } diff --git a/src/transports/sentry_http_transport_curl.c b/src/transports/sentry_http_transport_curl.c index b0c967f5cc..1954c8e5bf 100644 --- a/src/transports/sentry_http_transport_curl.c +++ b/src/transports/sentry_http_transport_curl.c @@ -4,6 +4,7 @@ #include "sentry_http_transport.h" #include "sentry_options.h" #include "sentry_string.h" +#include "sentry_sync.h" #include "sentry_transport.h" #include "sentry_utils.h" @@ -20,6 +21,7 @@ typedef struct { char *proxy; char *ca_certs; bool debug; + long shutdown; #ifdef SENTRY_PLATFORM_NX void *nx_state; #endif @@ -117,6 +119,22 @@ curl_client_start(void *_client, const sentry_options_t *options) return 0; } +static void +curl_client_shutdown(void *_client) +{ + curl_client_t *client = _client; + sentry__atomic_store(&client->shutdown, 1); +} + +static int +progress_callback(void *clientp, curl_off_t UNUSED(dltotal), + curl_off_t UNUSED(dlnow), curl_off_t UNUSED(ultotal), + curl_off_t UNUSED(ulnow)) +{ + curl_client_t *client = clientp; + return sentry__atomic_fetch(&client->shutdown) ? 1 : 0; +} + static size_t swallow_data( char *UNUSED(ptr), size_t size, size_t nmemb, void *UNUSED(userdata)) @@ -189,6 +207,10 @@ curl_send_task(void *_client, sentry_prepared_http_request_t *req, curl_easy_setopt(curl, CURLOPT_POSTFIELDS, req->body); curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)req->body_len); curl_easy_setopt(curl, CURLOPT_USERAGENT, SENTRY_SDK_USER_AGENT); + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS, 15000L); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); + curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback); + curl_easy_setopt(curl, CURLOPT_XFERINFODATA, client); char error_buf[CURL_ERROR_SIZE]; error_buf[0] = 0; @@ -253,5 +275,6 @@ sentry__transport_new_default(void) } sentry__http_transport_set_free_client(transport, curl_client_free); sentry__http_transport_set_start_client(transport, curl_client_start); + sentry__http_transport_set_shutdown_client(transport, curl_client_shutdown); return transport; } diff --git a/src/transports/sentry_http_transport_winhttp.c b/src/transports/sentry_http_transport_winhttp.c index 0997d35629..f358fa6cbe 100644 --- a/src/transports/sentry_http_transport_winhttp.c +++ b/src/transports/sentry_http_transport_winhttp.c @@ -134,6 +134,9 @@ winhttp_client_start(void *_client, const sentry_options_t *opts) return 1; } + // 15s resolve/connect, 30s send/receive (WinHTTP defaults) + WinHttpSetTimeouts(client->session, 15000, 15000, 30000, 30000); + return 0; } @@ -154,9 +157,9 @@ winhttp_client_shutdown(void *_client) WinHttpCloseHandle(client->session); client->session = NULL; } - if (client->request) { - WinHttpCloseHandle(client->request); - client->request = NULL; + HINTERNET request = InterlockedExchangePointer(&client->request, NULL); + if (request) { + WinHttpCloseHandle(request); } } @@ -302,10 +305,9 @@ winhttp_send_task(void *_client, sentry_prepared_http_request_t *req, uint64_t now = sentry__monotonic_time(); SENTRY_DEBUGF("request handled in %llums", now - started); -exit: - if (client->request) { - HINTERNET request = client->request; - client->request = NULL; +exit:; + HINTERNET request = InterlockedExchangePointer(&client->request, NULL); + if (request) { WinHttpCloseHandle(request); } sentry_free(url); From b8cdf5d556f7b26e85b9d50f10814efc67d3f6a6 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Tue, 24 Mar 2026 21:47:37 +0100 Subject: [PATCH 07/12] fix bgworker draining queue after on_timeout Replace the boolean `running` flag with a tri-state `status` field (STOPPED, RUNNING, STOPPING) so the worker stops after its current task when a timeout occurs, leaving remaining tasks in the queue for dump_queue to save to disk. Co-Authored-By: Claude Opus 4.6 --- src/sentry_sync.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/sentry_sync.c b/src/sentry_sync.c index bf6c9a9d8d..eaaca852e7 100644 --- a/src/sentry_sync.c +++ b/src/sentry_sync.c @@ -157,6 +157,12 @@ sentry__task_decref(sentry_bgworker_task_t *task) } } +enum { + SENTRY_BGW_STOPPED = 0, + SENTRY_BGW_RUNNING = 1, + SENTRY_BGW_STOPPING = 2, +}; + struct sentry_bgworker_s { sentry_threadid_t thread_id; char *thread_name; @@ -169,7 +175,7 @@ struct sentry_bgworker_s { void *state; void (*free_state)(void *state); long refcount; - long running; + long status; // SENTRY_BGW_* }; sentry_bgworker_t * @@ -236,9 +242,14 @@ sentry__bgworker_get_state(sentry_bgworker_t *bgw) static bool sentry__bgworker_is_done(sentry_bgworker_t *bgw) { - return (!bgw->first_task - || sentry__monotonic_time() < bgw->first_task->execute_after) - && !sentry__atomic_fetch(&bgw->running); + long status = sentry__atomic_fetch(&bgw->status); + if (status == SENTRY_BGW_STOPPING) { + // stop immediately to leave remaining tasks in the queue + return true; + } + return status != SENTRY_BGW_RUNNING + && (!bgw->first_task + || sentry__monotonic_time() < bgw->first_task->execute_after); } SENTRY_THREAD_FN @@ -317,11 +328,11 @@ int sentry__bgworker_start(sentry_bgworker_t *bgw) { SENTRY_DEBUG("starting background worker thread"); - sentry__atomic_store(&bgw->running, 1); + sentry__atomic_store(&bgw->status, SENTRY_BGW_RUNNING); // this incref moves the reference into the background thread sentry__bgworker_incref(bgw); if (sentry__thread_spawn(&bgw->thread_id, &worker_thread, bgw) != 0) { - sentry__atomic_store(&bgw->running, 0); + sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); sentry__bgworker_decref(bgw); return 1; } @@ -358,7 +369,7 @@ sentry__flush_task_decref(sentry_flush_task_t *task) int sentry__bgworker_flush(sentry_bgworker_t *bgw, uint64_t timeout) { - if (!sentry__atomic_fetch(&bgw->running)) { + if (sentry__atomic_fetch(&bgw->status) != SENTRY_BGW_RUNNING) { SENTRY_WARN("trying to flush non-running thread"); return 0; } @@ -421,14 +432,14 @@ static void shutdown_task(void *task_data, void *UNUSED(state)) { sentry_bgworker_t *bgw = task_data; - sentry__atomic_store(&bgw->running, 0); + sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); } int sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, void (*on_timeout)(void *), void *on_timeout_data) { - if (!sentry__atomic_fetch(&bgw->running)) { + if (sentry__atomic_fetch(&bgw->status) != SENTRY_BGW_RUNNING) { SENTRY_WARN("trying to shut down non-running thread"); return 0; } @@ -443,16 +454,18 @@ sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, uint64_t now = sentry__monotonic_time(); if (now > started && now - started > timeout) { if (on_timeout) { - // fire on_timeout to cancel the ongoing task, and give the - // worker an extra loop cycle up to 250ms to handle the - // cancellation + // fire on_timeout to cancel the ongoing task, and tell + // the worker to stop so remaining tasks stay in the + // queue for dumping. the worker gets up to 250ms for + // graceful cancellation before the thread is detached. sentry__mutex_unlock(&bgw->task_lock); on_timeout(on_timeout_data); + sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPING); on_timeout = NULL; sentry__mutex_lock(&bgw->task_lock); - // fall through to !running check below + // fall through to detach on next timeout } else { - sentry__atomic_store(&bgw->running, 0); + sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); sentry__thread_detach(bgw->thread_id); sentry__mutex_unlock(&bgw->task_lock); SENTRY_WARN("background thread failed to shut down cleanly " @@ -461,7 +474,7 @@ sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, } } - if (!sentry__atomic_fetch(&bgw->running)) { + if (sentry__atomic_fetch(&bgw->status) == SENTRY_BGW_STOPPED) { sentry__mutex_unlock(&bgw->task_lock); sentry__thread_join(bgw->thread_id); return 0; From fd927f17b4a2f55d36fdd370ea37bc9b69694d91 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 25 Mar 2026 08:35:48 +0100 Subject: [PATCH 08/12] replace tri-state status with separate draining flag Revert the noisy STOPPED/RUNNING/STOPPING enum in favor of keeping the original running flag, and adding a separate simple draining flag. When on_timeout fires, draining is set so the worker stops after its current task, leaving remaining tasks in the queue for dump_queue. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/sentry_sync.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/src/sentry_sync.c b/src/sentry_sync.c index eaaca852e7..3c93879b5f 100644 --- a/src/sentry_sync.c +++ b/src/sentry_sync.c @@ -157,12 +157,6 @@ sentry__task_decref(sentry_bgworker_task_t *task) } } -enum { - SENTRY_BGW_STOPPED = 0, - SENTRY_BGW_RUNNING = 1, - SENTRY_BGW_STOPPING = 2, -}; - struct sentry_bgworker_s { sentry_threadid_t thread_id; char *thread_name; @@ -175,7 +169,8 @@ struct sentry_bgworker_s { void *state; void (*free_state)(void *state); long refcount; - long status; // SENTRY_BGW_* + long running; + long draining; }; sentry_bgworker_t * @@ -242,14 +237,12 @@ sentry__bgworker_get_state(sentry_bgworker_t *bgw) static bool sentry__bgworker_is_done(sentry_bgworker_t *bgw) { - long status = sentry__atomic_fetch(&bgw->status); - if (status == SENTRY_BGW_STOPPING) { - // stop immediately to leave remaining tasks in the queue + if (sentry__atomic_fetch(&bgw->draining)) { return true; } - return status != SENTRY_BGW_RUNNING - && (!bgw->first_task - || sentry__monotonic_time() < bgw->first_task->execute_after); + return (!bgw->first_task + || sentry__monotonic_time() < bgw->first_task->execute_after) + && !sentry__atomic_fetch(&bgw->running); } SENTRY_THREAD_FN @@ -328,11 +321,11 @@ int sentry__bgworker_start(sentry_bgworker_t *bgw) { SENTRY_DEBUG("starting background worker thread"); - sentry__atomic_store(&bgw->status, SENTRY_BGW_RUNNING); + sentry__atomic_store(&bgw->running, 1); // this incref moves the reference into the background thread sentry__bgworker_incref(bgw); if (sentry__thread_spawn(&bgw->thread_id, &worker_thread, bgw) != 0) { - sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); + sentry__atomic_store(&bgw->running, 0); sentry__bgworker_decref(bgw); return 1; } @@ -369,7 +362,7 @@ sentry__flush_task_decref(sentry_flush_task_t *task) int sentry__bgworker_flush(sentry_bgworker_t *bgw, uint64_t timeout) { - if (sentry__atomic_fetch(&bgw->status) != SENTRY_BGW_RUNNING) { + if (!sentry__atomic_fetch(&bgw->running)) { SENTRY_WARN("trying to flush non-running thread"); return 0; } @@ -432,14 +425,14 @@ static void shutdown_task(void *task_data, void *UNUSED(state)) { sentry_bgworker_t *bgw = task_data; - sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); + sentry__atomic_store(&bgw->running, 0); } int sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, void (*on_timeout)(void *), void *on_timeout_data) { - if (sentry__atomic_fetch(&bgw->status) != SENTRY_BGW_RUNNING) { + if (!sentry__atomic_fetch(&bgw->running)) { SENTRY_WARN("trying to shut down non-running thread"); return 0; } @@ -454,18 +447,17 @@ sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, uint64_t now = sentry__monotonic_time(); if (now > started && now - started > timeout) { if (on_timeout) { - // fire on_timeout to cancel the ongoing task, and tell - // the worker to stop so remaining tasks stay in the - // queue for dumping. the worker gets up to 250ms for - // graceful cancellation before the thread is detached. + // fire on_timeout to cancel the ongoing task, and give the + // worker an extra loop cycle up to 250ms to handle the + // cancellation sentry__mutex_unlock(&bgw->task_lock); on_timeout(on_timeout_data); - sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPING); + sentry__atomic_store(&bgw->draining, 1); on_timeout = NULL; sentry__mutex_lock(&bgw->task_lock); - // fall through to detach on next timeout + // fall through to !running check below } else { - sentry__atomic_store(&bgw->status, SENTRY_BGW_STOPPED); + sentry__atomic_store(&bgw->running, 0); sentry__thread_detach(bgw->thread_id); sentry__mutex_unlock(&bgw->task_lock); SENTRY_WARN("background thread failed to shut down cleanly " @@ -474,7 +466,7 @@ sentry__bgworker_shutdown_cb(sentry_bgworker_t *bgw, uint64_t timeout, } } - if (sentry__atomic_fetch(&bgw->status) == SENTRY_BGW_STOPPED) { + if (!sentry__atomic_fetch(&bgw->running)) { sentry__mutex_unlock(&bgw->task_lock); sentry__thread_join(bgw->thread_id); return 0; From 912acc32ff5ec208557eeda6cdcf41c8824385ed Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 25 Mar 2026 09:17:46 +0100 Subject: [PATCH 09/12] drop orphaned cleanup task on shutdown timeout Allow NULL callback in sentry__bgworker_foreach_matching to drop tasks without invoking a callback. Use this to drop the cache cleanup task when shutdown times out, preventing a refcount underflow caused by sentry_options_free being called on an already-freeing options object. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/sentry_sync.c | 2 +- src/transports/sentry_http_transport.c | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/sentry_sync.c b/src/sentry_sync.c index 3c93879b5f..70b408688e 100644 --- a/src/sentry_sync.c +++ b/src/sentry_sync.c @@ -570,7 +570,7 @@ sentry__bgworker_foreach_matching(sentry_bgworker_t *bgw, bool drop_task = false; // only consider tasks matching this exec_func if (task->exec_func == exec_func) { - drop_task = callback(task->task_data, data); + drop_task = !callback || callback(task->task_data, data); } sentry_bgworker_task_t *next_task = task->next_task; diff --git a/src/transports/sentry_http_transport.c b/src/transports/sentry_http_transport.c index 70f66188f2..b97b9278b5 100644 --- a/src/transports/sentry_http_transport.c +++ b/src/transports/sentry_http_transport.c @@ -252,6 +252,14 @@ http_send_task(void *_envelope, void *_state) } } +static void +http_cleanup_cache_task(void *task_data, void *_state) +{ + (void)_state; + sentry_options_t *options = task_data; + sentry__cleanup_cache(options); +} + static void http_transport_shutdown_timeout(void *_state) { @@ -299,6 +307,10 @@ http_transport_shutdown(uint64_t timeout, void *transport_state) int rv = sentry__bgworker_shutdown_cb( bgworker, timeout, http_transport_shutdown_timeout, state); + if (rv != 0) { + sentry__bgworker_foreach_matching( + bgworker, http_cleanup_cache_task, NULL, NULL); + } return rv; } @@ -333,14 +345,6 @@ http_transport_get_state(sentry_transport_t *transport) return sentry__bgworker_get_state(bgworker); } -static void -http_cleanup_cache_task(void *task_data, void *_state) -{ - (void)_state; - sentry_options_t *options = task_data; - sentry__cleanup_cache(options); -} - static void http_transport_submit_cleanup( const sentry_options_t *options, void *transport_state) From af11d18c237acc48cab0ef1085c593f02cc52e77 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 25 Mar 2026 09:26:09 +0100 Subject: [PATCH 10/12] reset draining flag on bgworker start Co-Authored-By: Claude Opus 4.6 (1M context) --- src/sentry_sync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sentry_sync.c b/src/sentry_sync.c index 70b408688e..38d3826be3 100644 --- a/src/sentry_sync.c +++ b/src/sentry_sync.c @@ -322,6 +322,7 @@ sentry__bgworker_start(sentry_bgworker_t *bgw) { SENTRY_DEBUG("starting background worker thread"); sentry__atomic_store(&bgw->running, 1); + sentry__atomic_store(&bgw->draining, 0); // this incref moves the reference into the background thread sentry__bgworker_incref(bgw); if (sentry__thread_spawn(&bgw->thread_id, &worker_thread, bgw) != 0) { From a5e0fa8637848cc68ae37dffdc2be293877194e6 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 25 Mar 2026 10:54:29 +0100 Subject: [PATCH 11/12] run cleanup task synchronously on shutdown timeout When the worker is stopped early by the draining flag, the cleanup task stays in the queue unexecuted. Execute it synchronously via foreach_matching before returning from http_transport_shutdown. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/transports/sentry_http_transport.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/transports/sentry_http_transport.c b/src/transports/sentry_http_transport.c index b97b9278b5..6449e52908 100644 --- a/src/transports/sentry_http_transport.c +++ b/src/transports/sentry_http_transport.c @@ -299,6 +299,13 @@ http_transport_flush(uint64_t timeout, void *transport_state) return sentry__bgworker_flush(bgworker, timeout); } +static bool +http_flush_cleanup_cb(void *task_data, void *UNUSED(data)) +{ + http_cleanup_cache_task(task_data, NULL); + return true; +} + static int http_transport_shutdown(uint64_t timeout, void *transport_state) { @@ -309,7 +316,7 @@ http_transport_shutdown(uint64_t timeout, void *transport_state) bgworker, timeout, http_transport_shutdown_timeout, state); if (rv != 0) { sentry__bgworker_foreach_matching( - bgworker, http_cleanup_cache_task, NULL, NULL); + bgworker, http_cleanup_cache_task, http_flush_cleanup_cb, NULL); } return rv; } From 2a037d0c11f0ee36b6e4be8817f92a8aae65a3b9 Mon Sep 17 00:00:00 2001 From: J-P Nurmi Date: Wed, 25 Mar 2026 14:53:44 +0100 Subject: [PATCH 12/12] flush bgworker before crash in cache tests Drain the bgworker queue before triggering a crash to avoid malloc deadlock between the bgworker and breakpad's minidump writer. Call graph: 872 Thread_6504096 DispatchQueue_1: com.apple.main-thread (serial) + 872 start (in dyld) + 7184 [0x18ca19d54] + 872 main (in sentry_example) + 5652 [0x1002464a8] example.c:961 + 872 trigger_crash (in sentry_example) + 32 [0x10024762c] example.c:373 + 872 _platform_memset (in libsystem_platform.dylib) + 108 [0x18cded11c] 872 Thread_6504097 + 872 start_wqthread (in libsystem_pthread.dylib) + 8 [0x18cddeb9c] + 872 _pthread_wqthread (in libsystem_pthread.dylib) + 368 [0x18cddfe98] + 872 __workq_kernreturn (in libsystem_kernel.dylib) + 8 [0x18cda29dc] 872 Thread_6504098 + 872 start_wqthread (in libsystem_pthread.dylib) + 0 [0x18cddeb94] 872 Thread_6504099: sentry-http + 872 thread_start (in libsystem_pthread.dylib) + 8 [0x18cddeba8] + 872 _pthread_start (in libsystem_pthread.dylib) + 136 [0x18cde3c08] + 872 worker_thread (in libsentry.dylib) + 756 [0x1002e863c] sentry_sync.c:295 + 872 sentry__cleanup_cache (in libsentry.dylib) + 128 [0x1002deb18] sentry_database.c:392 + 872 sentry__pathiter_next (in libsentry.dylib) + 112 [0x1002f3f6c] sentry_path_unix.c:403 + 872 sentry__path_join_str (in libsentry.dylib) + 420 [0x1002f3cbc] sentry_path_unix.c:282 + 872 sentry__path_from_str_owned (in libsentry.dylib) + 24 [0x1002f26b8] sentry_path.c:24 + 872 mfm_alloc (in libsystem_malloc.dylib) + 416 [0x18cbf7f90] 872 Thread_6504100 872 thread_start (in libsystem_pthread.dylib) + 8 [0x18cddeba8] 872 _pthread_start (in libsystem_pthread.dylib) + 136 [0x18cde3c08] 872 google_breakpad::ExceptionHandler::WaitForMessage(void*) (in libsentry.dylib) + 264 [0x100301304] exception_handler.cc:606 872 google_breakpad::ExceptionHandler::WriteMinidumpWithException(int, int, int, __darwin_ucontext*, unsigned int, bool, bool) (in libsentry.dylib) + 452 [0x100301190] exception_handler.cc:434 872 google_breakpad::MinidumpGenerator::Write(char const*) (in libsentry.dylib) + 332 [0x1002fdad0] minidump_generator.cc:326 872 google_breakpad::MinidumpGenerator::WriteModuleListStream(MDRawDirectory*) (in libsentry.dylib) + 280 [0x1002fe474] minidump_generator.cc:1574 872 google_breakpad::MinidumpGenerator::WriteModuleStream(unsigned int, MDRawModule*) (in libsentry.dylib) + 360 [0x1002ff580] minidump_generator.cc:1451 872 google_breakpad::MinidumpGenerator::WriteCVRecord(MDRawModule*, int, char const*, bool) (in libsentry.dylib) + 360 [0x1002ff8dc] minidump_generator.cc:1528 872 google_breakpad::mach_o::FileID::MachoIdentifier(int, int, unsigned char*) (in libsentry.dylib) + 88 [0x1002f81ec] file_id.cc:69 872 operator new(unsigned long) (in libc++abi.dylib) + 52 [0x18cd9ba78] 872 mfm_alloc (in libsystem_malloc.dylib) + 108 [0x18cbf7e5c] 872 _os_unfair_lock_lock_slow (in libsystem_platform.dylib) + 176 [0x18cdeb3e4] 872 __ulock_wait2 (in libsystem_kernel.dylib) + 8 [0x18cdaec74] Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_integration_cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_integration_cache.py b/tests/test_integration_cache.py index 81741ab91c..0669f7c680 100644 --- a/tests/test_integration_cache.py +++ b/tests/test_integration_cache.py @@ -35,7 +35,7 @@ def test_cache_keep(cmake, backend, cache_keep): run( tmp_path, "sentry_example", - ["log", "crash"] + (["cache-keep"] if cache_keep else []), + ["log", "flush", "crash"] + (["cache-keep"] if cache_keep else []), expect_failure=True, env=env, ) @@ -77,7 +77,7 @@ def test_cache_max_size(cmake, backend): run( tmp_path, "sentry_example", - ["log", "cache-keep", "crash"], + ["log", "cache-keep", "flush", "crash"], expect_failure=True, env=env, ) @@ -131,7 +131,7 @@ def test_cache_max_age(cmake, backend): run( tmp_path, "sentry_example", - ["log", "cache-keep", "crash"], + ["log", "cache-keep", "flush", "crash"], expect_failure=True, env=env, ) @@ -186,7 +186,7 @@ def test_cache_max_items(cmake, backend): run( tmp_path, "sentry_example", - ["log", "cache-keep", "crash"], + ["log", "cache-keep", "flush", "crash"], expect_failure=True, env=env, )