From 3708a6ddcd020b01a5226be3cad5aa57b7661ac4 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Wed, 3 Jun 2026 09:01:35 +0100 Subject: [PATCH] Add --force-refresh flag to overwrite owl_cache canonical slots The existing pre-warm pass walks every (query_type, entity_id) tuple for the OWLERY + V3 endpoints and runs a GET against owl_cache (v3-cached.virtualflybrain.org for V3, owl.virtualflybrain.org for the legacy OWLERY queries). That's enough on first deploy when the cache is cold, but it does nothing for a refresh: the very first request after the wamup is served from the existing cache slot. After a VFBquery release the deployed response shape may differ (new columns, fixed wire format, etc.). We need a way to force-fetch every entry from upstream and overwrite the canonical owl_cache slot, in the background, so end-users hitting v3-cached after the release get either a stale (previous-version) entry or a fresh (post-warmup) entry -- never a cold miss. owl_cache's companion change adds: map $http_x_force_refresh $force_refresh { default ${FORCE_CACHE_REFRESH_ON_REQUEST}; "~*^(true|1|yes)$" 1; } with the pre-existing `proxy_cache_bypass $force_refresh` and no `proxy_no_cache` -- so a request with `X-Force-Refresh: true` bypasses the cache for the fetch AND the upstream response then overwrites the canonical slot. This change adds the matching CLI flag. With `--force-refresh`, every request sent through this tool gets `X-Force-Refresh: true`. Without the flag, behaviour is unchanged (cold-cache pre-warm). Mechanical changes: - `run_query` and `run_query_type` now accept an optional `headers` kwarg threaded through to `requests.get`. - `--force-refresh` argparse flag (default off). - `request_headers = {'X-Force-Refresh': 'true'}` built once in `main` and passed to every executor submit. Rollout order: 1. owl_cache header-based-force-refresh branch -> merge -> redeploy. 2. This owlery-cache-reload PR -> merge -> redeploy. 3. Jenkins job: `python main.py --force-refresh` after each tagged VFBquery push. --- main.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 71bce5a..9fc9095 100644 --- a/main.py +++ b/main.py @@ -12,10 +12,10 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from vfb_connect import vfb -def run_query_type(name, url_template, ids, timeout, parallel, counter, counter_lock, total_queries): +def run_query_type(name, url_template, ids, timeout, parallel, counter, counter_lock, total_queries, headers=None): """Run all IDs for a single query type in its own thread pool.""" with ThreadPoolExecutor(max_workers=parallel) as executor: - futures = {executor.submit(run_query, name, url_template, id, timeout): id for id in ids} + futures = {executor.submit(run_query, name, url_template, id, timeout, headers): id for id in ids} for future in as_completed(futures): result = future.result() with counter_lock: @@ -30,7 +30,7 @@ def _get_session(): _thread_local.session = requests.Session() return _thread_local.session -def run_query(name, url_template, id, timeout=60): +def run_query(name, url_template, id, timeout=60, headers=None): if id is None: query_url = url_template id_label = "(global)" @@ -39,7 +39,7 @@ def run_query(name, url_template, id, timeout=60): id_label = id try: - response = _get_session().get(query_url, timeout=timeout) + response = _get_session().get(query_url, timeout=timeout, headers=headers) if response.status_code == 200: return f"✓ {name} for {id_label}" else: @@ -126,7 +126,17 @@ def main(): parser.add_argument('--max-ids', type=int, default=None, help='Maximum number of IDs to test per query (for testing).') parser.add_argument('--timeout', type=int, default=9000, help='Timeout in seconds for each query request.') parser.add_argument('--parallel', type=int, default=50, help='Number of parallel requests to run at once.') + parser.add_argument( + '--force-refresh', action='store_true', + help='Send X-Force-Refresh: true on every request. owl_cache (v3-cached) ' + 'bypasses its cache for the request and overwrites the canonical slot ' + 'with the fresh upstream response. Use after a VFBquery release to ' + 'pre-warm the cache so end-users never see a cold miss.', + ) args = parser.parse_args() + request_headers = {'X-Force-Refresh': 'true'} if args.force_refresh else None + if args.force_refresh: + print("force-refresh mode: X-Force-Refresh: true on every request") # Connect to VFB print("Connecting to VFB...") @@ -184,7 +194,8 @@ def main(): futures = [ query_type_executor.submit( run_query_type, name, url_template, ids, - args.timeout, args.parallel, counter, counter_lock, total_queries + args.timeout, args.parallel, counter, counter_lock, total_queries, + request_headers, ) for name, url_template, ids in query_jobs ]