From 3708a6ddcd020b01a5226be3cad5aa57b7661ac4 Mon Sep 17 00:00:00 2001
From: Rob Court <robbie.court@gmail.com>
Date: Wed, 3 Jun 2026 09:01:35 +0100
Subject: [PATCH] Add --force-refresh flag to overwrite owl_cache canonical
 slots

The existing pre-warm pass walks every (query_type, entity_id) tuple
for the OWLERY + V3 endpoints and runs a GET against owl_cache
(v3-cached.virtualflybrain.org for V3, owl.virtualflybrain.org for
the legacy OWLERY queries). That's enough on first deploy when the
cache is cold, but it does nothing for a refresh: the very first
request after the wamup is served from the existing cache slot.

After a VFBquery release the deployed response shape may differ
(new columns, fixed wire format, etc.). We need a way to force-fetch
every entry from upstream and overwrite the canonical owl_cache
slot, in the background, so end-users hitting v3-cached after the
release get either a stale (previous-version) entry or a fresh
(post-warmup) entry -- never a cold miss.

owl_cache's companion change adds:

    map $http_x_force_refresh $force_refresh {
        default ${FORCE_CACHE_REFRESH_ON_REQUEST};
        "~*^(true|1|yes)$" 1;
    }

with the pre-existing `proxy_cache_bypass $force_refresh` and no
`proxy_no_cache` -- so a request with `X-Force-Refresh: true`
bypasses the cache for the fetch AND the upstream response then
overwrites the canonical slot.

This change adds the matching CLI flag. With `--force-refresh`,
every request sent through this tool gets `X-Force-Refresh: true`.
Without the flag, behaviour is unchanged (cold-cache pre-warm).

Mechanical changes:

- `run_query` and `run_query_type` now accept an optional `headers`
  kwarg threaded through to `requests.get`.
- `--force-refresh` argparse flag (default off).
- `request_headers = {'X-Force-Refresh': 'true'}` built once in
  `main` and passed to every executor submit.

Rollout order:
  1. owl_cache header-based-force-refresh branch -> merge -> redeploy.
  2. This owlery-cache-reload PR -> merge -> redeploy.
  3. Jenkins job: `python main.py --force-refresh` after each
     tagged VFBquery push.
---
 main.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/main.py b/main.py
index 71bce5a..9fc9095 100644
--- a/main.py
+++ b/main.py
@@ -12,10 +12,10 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from vfb_connect import vfb
 
-def run_query_type(name, url_template, ids, timeout, parallel, counter, counter_lock, total_queries):
+def run_query_type(name, url_template, ids, timeout, parallel, counter, counter_lock, total_queries, headers=None):
     """Run all IDs for a single query type in its own thread pool."""
     with ThreadPoolExecutor(max_workers=parallel) as executor:
-        futures = {executor.submit(run_query, name, url_template, id, timeout): id for id in ids}
+        futures = {executor.submit(run_query, name, url_template, id, timeout, headers): id for id in ids}
         for future in as_completed(futures):
             result = future.result()
             with counter_lock:
@@ -30,7 +30,7 @@ def _get_session():
         _thread_local.session = requests.Session()
     return _thread_local.session
 
-def run_query(name, url_template, id, timeout=60):
+def run_query(name, url_template, id, timeout=60, headers=None):
     if id is None:
         query_url = url_template
         id_label = "(global)"
@@ -39,7 +39,7 @@ def run_query(name, url_template, id, timeout=60):
         id_label = id
 
     try:
-        response = _get_session().get(query_url, timeout=timeout)
+        response = _get_session().get(query_url, timeout=timeout, headers=headers)
         if response.status_code == 200:
             return f"✓ {name} for {id_label}"
         else:
@@ -126,7 +126,17 @@ def main():
     parser.add_argument('--max-ids', type=int, default=None, help='Maximum number of IDs to test per query (for testing).')
     parser.add_argument('--timeout', type=int, default=9000, help='Timeout in seconds for each query request.')
     parser.add_argument('--parallel', type=int, default=50, help='Number of parallel requests to run at once.')
+    parser.add_argument(
+        '--force-refresh', action='store_true',
+        help='Send X-Force-Refresh: true on every request. owl_cache (v3-cached) '
+             'bypasses its cache for the request and overwrites the canonical slot '
+             'with the fresh upstream response. Use after a VFBquery release to '
+             'pre-warm the cache so end-users never see a cold miss.',
+    )
     args = parser.parse_args()
+    request_headers = {'X-Force-Refresh': 'true'} if args.force_refresh else None
+    if args.force_refresh:
+        print("force-refresh mode: X-Force-Refresh: true on every request")
 
     # Connect to VFB
     print("Connecting to VFB...")
@@ -184,7 +194,8 @@ def main():
         futures = [
             query_type_executor.submit(
                 run_query_type, name, url_template, ids,
-                args.timeout, args.parallel, counter, counter_lock, total_queries
+                args.timeout, args.parallel, counter, counter_lock, total_queries,
+                request_headers,
             )
             for name, url_template, ids in query_jobs
         ]