From 5a83e11d43b3dcda84448600bf2406c35df5ca1d Mon Sep 17 00:00:00 2001 From: Corey Woodfield Date: Tue, 10 Jun 2025 17:18:36 -0600 Subject: [PATCH] Ignore interrupts if there's something to read Worker threads that are waiting for a work response get interrupted if they are still going after the execution has completed (See RedisShardBackplane.pollExecution and ShardWorkerContext.resumePoller and Executor.runInterruptible). This is happening to us often. Assuming the problem is in buildfarm, and not our rules, it may be that the execution writes its entire response and completes within the span of the Thread.sleep(10) call, and then the poller sees that the execution is complete and interrupts the thread before it gets the chance to check the output stream again. This seems a bit far-fetched, but it's the only explanation I came up with. If it is what is actually happening, we can avoid failure by ignoring the interrupt and reading the available response when we are interrupted with a response available. --- .../persistent/bazel/processes/ProtoWorkerRW.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/persistentworkers/src/main/java/persistent/bazel/processes/ProtoWorkerRW.java b/persistentworkers/src/main/java/persistent/bazel/processes/ProtoWorkerRW.java index 0663156626..a521aa3d30 100644 --- a/persistentworkers/src/main/java/persistent/bazel/processes/ProtoWorkerRW.java +++ b/persistentworkers/src/main/java/persistent/bazel/processes/ProtoWorkerRW.java @@ -85,7 +85,18 @@ public static void waitForInput(Supplier liveCheck, InputStream inputSt String workerDeathMsg = "Worker process for died while waiting for response"; // TODO can we do better than spinning? i.e. condition variable? while (inputAvailable(inputStream, workerDeathMsg) == 0) { - Thread.sleep(10); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + if (inputAvailable(inputStream, workerDeathMsg) > 0) { + // we were interrupted because the task is complete and we're still spinning: if there's a work response + // available, read it and ignore the interrupt + Thread.currentThread().interrupt(); + return; + } else { + throw e; + } + } if (!liveCheck.get()) { throw new IOException(workerDeathMsg + "\n"); }