-
Notifications
You must be signed in to change notification settings - Fork 161
fix: prevent agent loop stall on write_to_file filesystem errors (#703) #727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1730,6 +1730,21 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike { | |||||||||||||||||||||||||||||||
| return formatResponse.toolError(formatResponse.missingToolParameterError(paramName)) | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||
| * Finalize the last partial "tool" ask message without blocking for user input. | ||||||||||||||||||||||||||||||||
| * Call this in error paths where a partial tool message was opened during streaming | ||||||||||||||||||||||||||||||||
| * but execution failed before the normal approval flow could close it, so the webview | ||||||||||||||||||||||||||||||||
| * spinner does not get stuck in a loading state. | ||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||
| async finalizePartialToolAsk(): Promise<void> { | ||||||||||||||||||||||||||||||||
| const lastMessage = this.clineMessages.at(-1) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| if (lastMessage && lastMessage.partial && lastMessage.type === "ask" && lastMessage.ask === "tool") { | ||||||||||||||||||||||||||||||||
| lastMessage.partial = false | ||||||||||||||||||||||||||||||||
| await this.updateClineMessage(lastMessage) | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
Comment on lines
+1739
to
+1745
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗄️ Data Integrity & Integration | 🟡 Minor | ⚡ Quick win Persist the finalized tool-ask state. Line 1744 only calls Suggested fix async finalizePartialToolAsk(): Promise<void> {
const lastMessage = this.clineMessages.at(-1)
if (lastMessage && lastMessage.partial && lastMessage.type === "ask" && lastMessage.ask === "tool") {
lastMessage.partial = false
+ await this.saveClineMessages()
await this.updateClineMessage(lastMessage)
}
}📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Lifecycle | ||||||||||||||||||||||||||||||||
| // Start / Resume / Abort / Dispose | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,19 @@ interface WriteToFileParams { | |
| export class WriteToFileTool extends BaseTool<"write_to_file"> { | ||
| readonly name = "write_to_file" as const | ||
|
|
||
| /** | ||
| * Set when a filesystem error aborts diff-view streaming during handlePartial for the | ||
| * current tool invocation. Subsequent streaming deltas for the same block then skip the | ||
| * doomed open()/update() retry, which would otherwise create a fresh "Zoo wants to edit | ||
| * this file" message on every delta. Cleared by resetPartialState() between invocations. | ||
| */ | ||
| private partialStreamFailed = false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| override resetPartialState(): void { | ||
| super.resetPartialState() | ||
| this.partialStreamFailed = false | ||
| } | ||
|
|
||
| async execute(params: WriteToFileParams, task: Task, callbacks: ToolCallbacks): Promise<void> { | ||
| const { pushToolResult, handleError, askApproval } = callbacks | ||
| const relPath = params.path | ||
|
|
@@ -67,12 +80,6 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
| task.diffViewProvider.editType = fileExists ? "modify" : "create" | ||
| } | ||
|
|
||
| // Create parent directories early for new files to prevent ENOENT errors | ||
| // in subsequent operations (e.g., diffViewProvider.open, fs.readFile) | ||
| if (!fileExists) { | ||
| await createDirectoriesForFile(absolutePath) | ||
| } | ||
|
|
||
| if (newContent.startsWith("```")) { | ||
| newContent = newContent.split("\n").slice(1).join("\n") | ||
| } | ||
|
|
@@ -99,6 +106,13 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
| try { | ||
| task.consecutiveMistakeCount = 0 | ||
|
|
||
| // Create parent directories for new files inside the try block so filesystem | ||
| // errors (EROFS, EACCES, etc.) route through handleError with proper cleanup | ||
| // and consecutive-mistake counting, rather than escaping unhandled. | ||
| if (!fileExists) { | ||
| await createDirectoriesForFile(absolutePath) | ||
|
Comment on lines
107
to
+113
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The mistake counter is zeroed on line 107 before the call that can throw on line 113. When |
||
| } | ||
|
|
||
| const provider = task.providerRef.deref() | ||
| const state = await provider?.getState() | ||
| const diagnosticsEnabled = state?.diagnosticsEnabled ?? true | ||
|
|
@@ -186,6 +200,11 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
|
|
||
| return | ||
| } catch (error) { | ||
| // Finalize any open partial tool message so the UI spinner doesn't get stuck. | ||
| // The partial ask fired during streaming (handlePartial) or early in execute sets | ||
| // partial: true on the webview message; without this, the spinner persists even | ||
| // after the error bubble appears. | ||
| await task.finalizePartialToolAsk() | ||
| await handleError("writing file", error as Error) | ||
| await task.diffViewProvider.reset() | ||
| this.resetPartialState() | ||
|
|
@@ -197,6 +216,13 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
| const relPath: string | undefined = block.params.path | ||
| let newContent: string | undefined = block.params.content | ||
|
|
||
| // A prior streaming delta for this invocation already hit a fatal filesystem error. | ||
| // Skip further streaming work so we don't create a new partial tool message on every | ||
| // subsequent delta. execute() will report the error once when the block completes. | ||
| if (this.partialStreamFailed) { | ||
| return | ||
| } | ||
|
|
||
| // Wait for path to stabilize before showing UI (prevents truncated paths) | ||
| if (!this.hasPathStabilized(relPath) || newContent === undefined) { | ||
| return | ||
|
|
@@ -224,12 +250,6 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
| task.diffViewProvider.editType = fileExists ? "modify" : "create" | ||
| } | ||
|
|
||
| // Create parent directories early for new files to prevent ENOENT errors | ||
| // in subsequent operations (e.g., diffViewProvider.open) | ||
| if (!fileExists) { | ||
| await createDirectoriesForFile(absolutePath) | ||
| } | ||
|
|
||
| const isWriteProtected = task.rooProtectedController?.isWriteProtected(relPath!) || false | ||
| const isOutsideWorkspace = isPathOutsideWorkspace(absolutePath) | ||
|
|
||
|
|
@@ -245,14 +265,31 @@ export class WriteToFileTool extends BaseTool<"write_to_file"> { | |
| await task.ask("tool", partialMessage, block.partial).catch(() => {}) | ||
|
|
||
| if (newContent) { | ||
| if (!task.diffViewProvider.isEditing) { | ||
| await task.diffViewProvider.open(relPath!) | ||
| } | ||
| try { | ||
| if (!task.diffViewProvider.isEditing) { | ||
| await task.diffViewProvider.open(relPath!) | ||
| } | ||
|
|
||
| await task.diffViewProvider.update( | ||
| everyLineHasLineNumbers(newContent) ? stripLineNumbers(newContent) : newContent, | ||
| false, | ||
| ) | ||
| await task.diffViewProvider.update( | ||
| everyLineHasLineNumbers(newContent) ? stripLineNumbers(newContent) : newContent, | ||
| false, | ||
| ) | ||
| } catch (error) { | ||
| // Opening or updating the diff view can throw on filesystem errors | ||
| // (EACCES/EROFS on read-only paths). Finalize the partial tool message | ||
| // so the UI spinner doesn't get stuck and reset the diff view. Do NOT | ||
| // rethrow: the same filesystem operation is retried in execute() once the | ||
| // block completes, and that authoritative non-partial path reports the | ||
| // error to the user. Surfacing it here too would show the same error twice. | ||
| // Swallowing it here is safe because the agent loop advances naturally when | ||
| // the non-partial block arrives (it does not depend on this throw). | ||
| console.error(`Error streaming write_to_file diff view:`, error) | ||
| // Mark the stream as failed so later deltas don't re-attempt and spawn a new | ||
| // partial tool message each time. | ||
| this.partialStreamFailed = true | ||
| await task.finalizePartialToolAsk() | ||
| await task.diffViewProvider.reset() | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -178,6 +178,7 @@ describe("writeToFileTool", () => { | |
| } | ||
| mockCline.say = vi.fn().mockResolvedValue(undefined) | ||
| mockCline.ask = vi.fn().mockResolvedValue(undefined) | ||
| mockCline.finalizePartialToolAsk = vi.fn().mockResolvedValue(undefined) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The mock replaces the full implementation, so these tests verify the method is called but never that |
||
| mockCline.recordToolError = vi.fn() | ||
| mockCline.sayAndCreateMissingParamError = vi.fn().mockResolvedValue("Missing param error") | ||
|
|
||
|
|
@@ -279,15 +280,16 @@ describe("writeToFileTool", () => { | |
| ) | ||
|
|
||
| it.skipIf(process.platform === "win32")( | ||
| "creates parent directories when path has stabilized (partial)", | ||
| "does not create directories in handlePartial -- only execute() creates them", | ||
| async () => { | ||
| // First call - path not yet stabilized | ||
| // First call - path not yet stabilized, early return | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| expect(mockedCreateDirectoriesForFile).not.toHaveBeenCalled() | ||
|
|
||
| // Second call with same path - path is now stabilized | ||
| // Second call with same path - path stabilized, handlePartial runs but | ||
| // must NOT call createDirectoriesForFile (directory creation belongs in execute) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| expect(mockedCreateDirectoriesForFile).toHaveBeenCalledWith(absoluteFilePath) | ||
| expect(mockedCreateDirectoriesForFile).not.toHaveBeenCalled() | ||
| }, | ||
| ) | ||
|
|
||
|
|
@@ -452,16 +454,171 @@ describe("writeToFileTool", () => { | |
| expect(mockCline.diffViewProvider.reset).toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("handles partial streaming errors after path stabilizes", async () => { | ||
| it("swallows partial streaming errors instead of surfacing a duplicate error bubble", async () => { | ||
| // The same filesystem operation is retried in execute() once the block completes, | ||
| // and that authoritative non-partial path reports the error to the user. Surfacing | ||
| // it during streaming too would show the same error twice, so handlePartial must NOT | ||
| // route streaming errors through handleError. | ||
| mockCline.diffViewProvider.open.mockRejectedValue(new Error("Open failed")) | ||
|
|
||
| // First call - path not yet stabilized, no error yet | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
|
|
||
| // Second call with same path - path is now stabilized, error occurs | ||
| // Second call with same path - path is now stabilized, error occurs but is swallowed | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("finalizes partial tool message and resets diff view when handlePartial open() fails", async () => { | ||
| // Regression test: when diffViewProvider.open() throws during streaming (e.g. EACCES/EROFS | ||
| // on a read-only path), the partial tool ask created at the top of handlePartial leaves the | ||
| // UI spinner stuck. handlePartial must finalize the partial message and reset the diff view, | ||
| // and must NOT surface a duplicate error (execute() reports the authoritative one). | ||
| mockCline.diffViewProvider.open.mockRejectedValue( | ||
| Object.assign(new Error("EACCES: permission denied, open '/ro/test.py'"), { code: "EACCES" }), | ||
| ) | ||
|
|
||
| // First call - path not yet stabilized | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
| expect(mockHandleError).toHaveBeenCalledWith("handling partial write_to_file", expect.any(Error)) | ||
| expect(mockCline.finalizePartialToolAsk).not.toHaveBeenCalled() | ||
|
|
||
| // Second call - path stabilized, open() rejects | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
|
|
||
| expect(mockCline.finalizePartialToolAsk).toHaveBeenCalled() | ||
| expect(mockCline.diffViewProvider.reset).toHaveBeenCalled() | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("finalizes partial tool message and resets diff view when handlePartial update() fails", async () => { | ||
| // Same regression as above but for the streaming update() call failing after open() succeeds. | ||
| mockCline.diffViewProvider.update.mockRejectedValue( | ||
| Object.assign(new Error("EROFS: read-only file system, write '/ro/test.py'"), { code: "EROFS" }), | ||
| ) | ||
|
|
||
| // First call - path not yet stabilized | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
|
|
||
| // Second call - path stabilized, update() rejects | ||
| await executeWriteFileTool({}, { isPartial: true }) | ||
|
|
||
| expect(mockCline.finalizePartialToolAsk).toHaveBeenCalled() | ||
| expect(mockCline.diffViewProvider.reset).toHaveBeenCalled() | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("does not spawn a new partial tool message on each streaming delta after a failure", async () => { | ||
| // Regression test: after diffViewProvider.open() throws and the partial message is | ||
| // finalized + diff view reset, the next streaming delta saw a non-partial last message | ||
| // and created a brand new "Zoo wants to edit this file" message -- repeating once per | ||
| // delta. After the fix, partialStreamFailed short-circuits subsequent deltas so only | ||
| // the single initial partial ask is issued. | ||
| mockCline.diffViewProvider.open.mockRejectedValue( | ||
| Object.assign(new Error("EROFS: read-only file system, mkdir '/scratch'"), { code: "EROFS" }), | ||
| ) | ||
|
|
||
| // Delta 1 - stabilize path (no ask yet) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| // Delta 2 - path stabilized, ask issued once, open() fails, stream marked failed | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| // Deltas 3..5 - must be short-circuited, no further asks | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
|
|
||
| // Only the single partial ask from delta 2 should have been issued | ||
| expect(mockCline.ask).toHaveBeenCalledTimes(1) | ||
| // open() must not be retried after the first failure | ||
| expect(mockCline.diffViewProvider.open).toHaveBeenCalledTimes(1) | ||
| }) | ||
|
|
||
| it("reports a filesystem error only once across the streaming and execute phases", async () => { | ||
| // Regression test for the double-error UX defect: a single write_to_file call to a | ||
| // read-only path failed twice -- once in handlePartial ("handling partial write_to_file") | ||
| // and once in execute() ("writing file"). handlePartial now swallows its error so only | ||
| // the authoritative execute() error is surfaced. | ||
| const erofs = () => | ||
| Object.assign(new Error("EROFS: read-only file system, mkdir '/scratch'"), { code: "EROFS" }) | ||
| mockCline.diffViewProvider.open.mockRejectedValue(erofs()) | ||
| mockedCreateDirectoriesForFile.mockRejectedValue(erofs()) | ||
|
|
||
| // Streaming phase: stabilize path then fail (swallowed, no handleError) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
|
|
||
| // Final phase: execute() reports the single authoritative error | ||
| await executeWriteFileTool({}, { fileExists: false }) | ||
|
|
||
| expect(mockHandleError).toHaveBeenCalledTimes(1) | ||
| expect(mockHandleError).toHaveBeenCalledWith("writing file", expect.any(Error)) | ||
| }) | ||
|
|
||
| it.skipIf(process.platform === "win32")( | ||
| "EROFS in handlePartial does not stall agent loop -- createDirectoriesForFile is not called", | ||
| async () => { | ||
| // Regression test: before the fix, createDirectoriesForFile was called in handlePartial | ||
| // with no .catch() guard. An EROFS throw escaped to BaseTool.handle(), which called | ||
| // handleError but did not set didRejectTool/didAlreadyUseTool, so the advancement gate | ||
| // in presentAssistantMessage was never reached and the agent loop stalled permanently. | ||
| // After the fix the call is removed entirely -- handlePartial never touches the filesystem. | ||
| mockedCreateDirectoriesForFile.mockRejectedValue( | ||
| Object.assign(new Error("EROFS: read-only file system, mkdir '/scratch'"), { code: "EROFS" }), | ||
| ) | ||
|
|
||
| // First call -- path not yet stabilized, returns early | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
|
|
||
| // Second call -- path stabilized; createDirectoriesForFile must NOT be called from | ||
| // handlePartial, so the mock rejection must not trigger and handleError must not be called | ||
| await executeWriteFileTool({}, { fileExists: false, isPartial: true }) | ||
| expect(mockedCreateDirectoriesForFile).not.toHaveBeenCalled() | ||
| expect(mockHandleError).not.toHaveBeenCalled() | ||
| }, | ||
| ) | ||
|
|
||
| it.skipIf(process.platform === "win32")( | ||
| "EROFS in execute() routes through handleError with cleanup rather than escaping unhandled", | ||
| async () => { | ||
| // Regression test: before the fix, createDirectoriesForFile in execute() sat outside | ||
| // the try block (lines 70-74), so an EROFS error escaped the catch at line 188 entirely. | ||
| // After the fix the call is inside the try block, so filesystem errors are caught and | ||
| // routed through handleError with proper diffViewProvider.reset() cleanup. | ||
| mockedCreateDirectoriesForFile.mockRejectedValue( | ||
| Object.assign(new Error("EROFS: read-only file system, mkdir '/scratch'"), { code: "EROFS" }), | ||
| ) | ||
|
|
||
| await executeWriteFileTool({}, { fileExists: false }) | ||
|
|
||
| expect(mockHandleError).toHaveBeenCalledWith("writing file", expect.any(Error)) | ||
| expect(mockCline.diffViewProvider.reset).toHaveBeenCalled() | ||
| // The tool must not have proceeded to open or save | ||
| expect(mockCline.diffViewProvider.open).not.toHaveBeenCalled() | ||
| expect(mockCline.diffViewProvider.saveChanges).not.toHaveBeenCalled() | ||
| }, | ||
| ) | ||
|
|
||
| it.skipIf(process.platform === "win32")( | ||
| "finalizes partial tool message on error so the UI spinner does not get stuck", | ||
| async () => { | ||
| // Regression test: when a filesystem error is thrown in execute() the webview | ||
| // message created during handlePartial (or the early ask in execute) is stuck in | ||
| // partial: true state, showing an indefinite spinner alongside the error bubble. | ||
| // The catch block must call finalizePartialToolAsk() to close the spinner without | ||
| // blocking for user input. | ||
| mockedCreateDirectoriesForFile.mockRejectedValue( | ||
| Object.assign(new Error("EACCES: permission denied, mkdir '/ro'"), { code: "EACCES" }), | ||
| ) | ||
|
|
||
| await executeWriteFileTool({}, { fileExists: false }) | ||
|
|
||
| // handleError must still be called | ||
| expect(mockHandleError).toHaveBeenCalledWith("writing file", expect.any(Error)) | ||
|
|
||
| // finalizePartialToolAsk must have been called to dismiss the spinner | ||
| expect(mockCline.finalizePartialToolAsk).toHaveBeenCalled() | ||
| }, | ||
| ) | ||
| }) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The target is identified by position (
at(-1)) with no correlation to the specific partial ask opened during streaming. Is there any async path between thetask.ask("tool", ..., block.partial)call inhandlePartialand the catch block that could push a new message (e.g.processQueuedMessages)? If so,at(-1)could silently no-op and leave the spinner stuck.