From a130d28e06939d697345a5a4d3c457894974e776 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:29:58 +0100 Subject: [PATCH 1/7] todo: fix TOCTOU race by replacing FindByID+Update with atomic UpdateByID The previous code called FindByID (which acquires and releases a lock) and then Update by index (which acquires and releases a separate lock). Between these two operations, a concurrent insertion or deletion could shift indices, causing the update to modify the wrong todo item. Replace the two-step FindByID/Update with a single UpdateByID method that atomically finds and updates under one lock hold. Add a corresponding FindAndUpdate method to concurrent.Slice. Assisted-By: docker-agent --- pkg/concurrent/slice.go | 15 +++++++++++++++ pkg/tools/builtin/todo.go | 28 ++++++++++------------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pkg/concurrent/slice.go b/pkg/concurrent/slice.go index 539c523ff..bee11541d 100644 --- a/pkg/concurrent/slice.go +++ b/pkg/concurrent/slice.go @@ -92,6 +92,21 @@ func (s *Slice[V]) Update(index int, f func(V) V) bool { return true } +// FindAndUpdate atomically finds the first element matching the predicate +// and applies f to it. It returns true if an element was found and updated. +func (s *Slice[V]) FindAndUpdate(predicate func(V) bool, f func(V) V) bool { + s.mu.Lock() + defer s.mu.Unlock() + + for i, v := range s.values { + if predicate(v) { + s.values[i] = f(s.values[i]) + return true + } + } + return false +} + func (s *Slice[V]) Clear() { s.mu.Lock() defer s.mu.Unlock() diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index 6d412a716..2f55bb7bc 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -86,10 +86,9 @@ type TodoStorage interface { All() []Todo // Len returns the number of todo items. Len() int - // FindByID returns the index of the todo with the given ID, or -1 if not found. - FindByID(id string) int - // Update modifies the todo at the given index using the provided function. - Update(index int, fn func(Todo) Todo) + // UpdateByID atomically finds a todo by ID and applies fn to it. + // It returns true if the todo was found and updated, false otherwise. + UpdateByID(id string, fn func(Todo) Todo) bool // Clear removes all todo items. Clear() } @@ -117,13 +116,8 @@ func (s *MemoryTodoStorage) Len() int { return s.todos.Length() } -func (s *MemoryTodoStorage) FindByID(id string) int { - _, idx := s.todos.Find(func(t Todo) bool { return t.ID == id }) - return idx -} - -func (s *MemoryTodoStorage) Update(index int, fn func(Todo) Todo) { - s.todos.Update(index, fn) +func (s *MemoryTodoStorage) UpdateByID(id string, fn func(Todo) Todo) bool { + return s.todos.FindAndUpdate(func(t Todo) bool { return t.ID == id }, fn) } func (s *MemoryTodoStorage) Clear() { @@ -222,16 +216,14 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t result := UpdateTodosOutput{} for _, update := range params.Updates { - idx := h.storage.FindByID(update.ID) - if idx == -1 { - result.NotFound = append(result.NotFound, update.ID) - continue - } - - h.storage.Update(idx, func(t Todo) Todo { + ok := h.storage.UpdateByID(update.ID, func(t Todo) Todo { t.Status = update.Status return t }) + if !ok { + result.NotFound = append(result.NotFound, update.ID) + continue + } result.Updated = append(result.Updated, update) } From 6c544ca3df73a035be5aa6bff9daf6eefaac299d Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:31:01 +0100 Subject: [PATCH 2/7] todo: eliminate redundant storage.All() clones per handler call Each handler was calling h.storage.All() up to 3 times: once for the output struct's AllTodos field, once inside incompleteReminder(), and once in jsonResult() for Meta. Each call acquires a read lock and clones the entire slice. Refactor to call All() once per handler invocation, passing the snapshot to incompleteReminder (now a plain function) and to jsonResult. This also fixes the error path in updateTodos where AllTodos and Reminder were not populated when all updates failed (not-found). Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 40 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index 2f55bb7bc..c4302235a 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -179,25 +179,26 @@ func (h *todoHandler) addTodo(description string) Todo { return todo } -// jsonResult builds a ToolCallResult with a JSON-serialized output and current storage as Meta. -func (h *todoHandler) jsonResult(v any) (*tools.ToolCallResult, error) { +// jsonResult builds a ToolCallResult with a JSON-serialized output and allTodos as Meta. +func (h *todoHandler) jsonResult(v any, allTodos []Todo) (*tools.ToolCallResult, error) { out, err := json.Marshal(v) if err != nil { return nil, fmt.Errorf("marshaling todo output: %w", err) } return &tools.ToolCallResult{ Output: string(out), - Meta: h.storage.All(), + Meta: allTodos, }, nil } func (h *todoHandler) createTodo(_ context.Context, params CreateTodoArgs) (*tools.ToolCallResult, error) { created := h.addTodo(params.Description) + allTodos := h.storage.All() return h.jsonResult(CreateTodoOutput{ Created: created, - AllTodos: h.storage.All(), - Reminder: h.incompleteReminder(), - }) + AllTodos: allTodos, + Reminder: incompleteReminder(allTodos), + }, allTodos) } func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*tools.ToolCallResult, error) { @@ -205,11 +206,12 @@ func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*t for _, desc := range params.Descriptions { created = append(created, h.addTodo(desc)) } + allTodos := h.storage.All() return h.jsonResult(CreateTodosOutput{ Created: created, - AllTodos: h.storage.All(), - Reminder: h.incompleteReminder(), - }) + AllTodos: allTodos, + Reminder: incompleteReminder(allTodos), + }, allTodos) } func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) { @@ -227,8 +229,12 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t result.Updated = append(result.Updated, update) } + allTodos := h.storage.All() + result.AllTodos = allTodos + result.Reminder = incompleteReminder(allTodos) + if len(result.NotFound) > 0 && len(result.Updated) == 0 { - res, err := h.jsonResult(result) + res, err := h.jsonResult(result, allTodos) if err != nil { return nil, err } @@ -236,16 +242,12 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t return res, nil } - result.AllTodos = h.storage.All() - result.Reminder = h.incompleteReminder() - - return h.jsonResult(result) + return h.jsonResult(result, allTodos) } // incompleteReminder returns a reminder string listing any non-completed todos, -// or an empty string if all are completed (or storage is empty). -func (h *todoHandler) incompleteReminder() string { - all := h.storage.All() +// or an empty string if all are completed (or the list is empty). +func incompleteReminder(all []Todo) string { var pending, inProgress []string for _, todo := range all { switch todo.Status { @@ -276,8 +278,8 @@ func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.Too todos = []Todo{} } out := ListTodosOutput{Todos: todos} - out.Reminder = h.incompleteReminder() - return h.jsonResult(out) + out.Reminder = incompleteReminder(todos) + return h.jsonResult(out, todos) } func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) { From 4497f0fe1d26525f06954dd6fc2c28ac4a9e4e6d Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:31:55 +0100 Subject: [PATCH 3/7] todo: simplify incompleteReminder with single-pass builder writes Replace the two intermediate string slices and fmt.Sprintf+concatenation with a single pass that writes directly to the strings.Builder. This avoids temporary string allocations from the intermediate slices and from the "+" concatenation in WriteString calls. Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index c4302235a..cd513468d 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -248,26 +248,22 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t // incompleteReminder returns a reminder string listing any non-completed todos, // or an empty string if all are completed (or the list is empty). func incompleteReminder(all []Todo) string { - var pending, inProgress []string + var b strings.Builder for _, todo := range all { + var prefix string switch todo.Status { case "pending": - pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description)) + prefix = " (pending) " case "in-progress": - inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description)) + prefix = " (in-progress) " + default: + continue } - } - if len(pending) == 0 && len(inProgress) == 0 { - return "" - } - - var b strings.Builder - b.WriteString("The following todos are still incomplete and MUST be completed:") - for _, s := range inProgress { - b.WriteString(" (in-progress) " + s) - } - for _, s := range pending { - b.WriteString(" (pending) " + s) + if b.Len() == 0 { + b.WriteString("The following todos are still incomplete and MUST be completed:") + } + b.WriteString(prefix) + fmt.Fprintf(&b, "[%s] %s", todo.ID, todo.Description) } return b.String() } From 10225866862047e88aa70be7e08ce6fce4afc1f7 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:33:21 +0100 Subject: [PATCH 4/7] todo: validate status values in updateTodos before mutating Previously, updateTodos accepted any string as a status value without validation. An LLM could pass "done" or "finished" and silently corrupt the todo state, making incompleteReminder unable to detect those items. Add upfront validation against the set of allowed statuses (pending, in-progress, completed). If any update contains an invalid status, return an error result before performing any mutations, preventing partial updates with bad data. Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 22 ++++++++++++++++++++++ pkg/tools/builtin/todo_test.go | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index cd513468d..9d7df93af 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -214,7 +214,29 @@ func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*t }, allTodos) } +// validTodoStatuses defines the set of allowed todo statuses. +var validTodoStatuses = map[string]bool{ + "pending": true, + "in-progress": true, + "completed": true, +} + func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) { + for _, update := range params.Updates { + if !validTodoStatuses[update.Status] { + errMsg := fmt.Sprintf("invalid status %q for todo %s: must be one of pending, in-progress, completed", update.Status, update.ID) + out, err := json.Marshal(map[string]string{"error": errMsg}) + if err != nil { + return nil, fmt.Errorf("marshaling todo error: %w", err) + } + return &tools.ToolCallResult{ + Output: string(out), + IsError: true, + Meta: h.storage.All(), + }, nil + } + } + result := UpdateTodosOutput{} for _, update := range params.Updates { diff --git a/pkg/tools/builtin/todo_test.go b/pkg/tools/builtin/todo_test.go index b17da8103..98860e9ad 100644 --- a/pkg/tools/builtin/todo_test.go +++ b/pkg/tools/builtin/todo_test.go @@ -230,6 +230,28 @@ func TestTodoTool_UpdateTodos_AllNotFound(t *testing.T) { assert.Equal(t, "nonexistent2", output.NotFound[1]) } +func TestTodoTool_UpdateTodos_InvalidStatus(t *testing.T) { + storage := NewMemoryTodoStorage() + tool := NewTodoTool(WithStorage(storage)) + + _, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "Task"}) + require.NoError(t, err) + + result, err := tool.handler.updateTodos(t.Context(), UpdateTodosArgs{ + Updates: []TodoUpdate{ + {ID: "todo_1", Status: "done"}, + }, + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Output, "done") + + // Storage should be unchanged — no partial mutation. + todos := storage.All() + require.Len(t, todos, 1) + assert.Equal(t, "pending", todos[0].Status) +} + func TestTodoTool_UpdateTodos_AllCompleted_NoAutoRemoval(t *testing.T) { storage := NewMemoryTodoStorage() tool := NewTodoTool(WithStorage(storage)) From ad467c8d3f0fac72f3f4d0772db8dad8161d1931 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:34:23 +0100 Subject: [PATCH 5/7] todo: move nextID counter from todoHandler into TodoStorage The ID counter lived on todoHandler while the data lived in storage. This created two problems: 1. If storage.Clear() was called, the counter kept incrementing from where it left off, creating a gap in IDs. 2. If storage was shared between multiple handlers (via WithStorage), each handler had its own independent counter, which could produce duplicate IDs. Move the counter into TodoStorage (and MemoryTodoStorage) via a new NextID() method so that ID generation is always coupled with the data it identifies. Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index 9d7df93af..ee5f19370 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -86,6 +86,8 @@ type TodoStorage interface { All() []Todo // Len returns the number of todo items. Len() int + // NextID returns a unique, monotonically increasing ID for a new todo. + NextID() int64 // UpdateByID atomically finds a todo by ID and applies fn to it. // It returns true if the todo was found and updated, false otherwise. UpdateByID(id string, fn func(Todo) Todo) bool @@ -95,7 +97,8 @@ type TodoStorage interface { // MemoryTodoStorage is an in-memory, concurrency-safe implementation of TodoStorage. type MemoryTodoStorage struct { - todos *concurrent.Slice[Todo] + todos *concurrent.Slice[Todo] + nextID atomic.Int64 } func NewMemoryTodoStorage() *MemoryTodoStorage { @@ -116,6 +119,10 @@ func (s *MemoryTodoStorage) Len() int { return s.todos.Length() } +func (s *MemoryTodoStorage) NextID() int64 { + return s.nextID.Add(1) +} + func (s *MemoryTodoStorage) UpdateByID(id string, fn func(Todo) Todo) bool { return s.todos.FindAndUpdate(func(t Todo) bool { return t.ID == id }, fn) } @@ -140,7 +147,6 @@ func WithStorage(storage TodoStorage) TodoOption { type todoHandler struct { storage TodoStorage - nextID atomic.Int64 } var NewSharedTodoTool = sync.OnceValue(func() *TodoTool { return NewTodoTool() }) @@ -171,7 +177,7 @@ Track task progress with todos: // addTodo creates a new todo and adds it to storage. func (h *todoHandler) addTodo(description string) Todo { todo := Todo{ - ID: fmt.Sprintf("todo_%d", h.nextID.Add(1)), + ID: fmt.Sprintf("todo_%d", h.storage.NextID()), Description: description, Status: "pending", } From 051f513ba6b2308c876c6888a6a2621bc42a62c9 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:35:06 +0100 Subject: [PATCH 6/7] todo: guarantee non-nil slice from MemoryTodoStorage.All() Previously, only listTodos had a nil guard for the slice returned by All(), while createTodo, createTodos, and updateTodos did not. If storage was cleared, those handlers would serialize null instead of [] for the AllTodos JSON field. Move the nil-to-empty-slice normalization into MemoryTodoStorage.All() so all callers consistently get a non-nil slice, and remove the now-redundant guard from listTodos. Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index ee5f19370..d55ced948 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -112,7 +112,11 @@ func (s *MemoryTodoStorage) Add(todo Todo) { } func (s *MemoryTodoStorage) All() []Todo { - return s.todos.All() + all := s.todos.All() + if all == nil { + return []Todo{} + } + return all } func (s *MemoryTodoStorage) Len() int { @@ -298,9 +302,6 @@ func incompleteReminder(all []Todo) string { func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) { todos := h.storage.All() - if todos == nil { - todos = []Todo{} - } out := ListTodosOutput{Todos: todos} out.Reminder = incompleteReminder(todos) return h.jsonResult(out, todos) From c8a76000444decaba74e7cfac201a41f88c72259 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 15:42:15 +0100 Subject: [PATCH 7/7] todo: use UpdateTodosOutput for status validation errors The validation error path was returning a raw map[string]string{"error": ...} which did not conform to the tool's declared OutputSchema (UpdateTodosOutput). A consumer parsing the output as UpdateTodosOutput would get zero-value fields instead of a structured error. Return a proper UpdateTodosOutput with AllTodos populated and the error message in the Reminder field, matching the response shape of all other code paths. Also fixes an extra h.storage.All() clone that was not following the single-snapshot pattern. Assisted-By: docker-agent --- pkg/tools/builtin/todo.go | 23 ++++++++++++----------- pkg/tools/builtin/todo_test.go | 8 +++++++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index d55ced948..1651cb231 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -233,18 +233,19 @@ var validTodoStatuses = map[string]bool{ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) { for _, update := range params.Updates { - if !validTodoStatuses[update.Status] { - errMsg := fmt.Sprintf("invalid status %q for todo %s: must be one of pending, in-progress, completed", update.Status, update.ID) - out, err := json.Marshal(map[string]string{"error": errMsg}) - if err != nil { - return nil, fmt.Errorf("marshaling todo error: %w", err) - } - return &tools.ToolCallResult{ - Output: string(out), - IsError: true, - Meta: h.storage.All(), - }, nil + if validTodoStatuses[update.Status] { + continue + } + allTodos := h.storage.All() + res, err := h.jsonResult(UpdateTodosOutput{ + AllTodos: allTodos, + Reminder: fmt.Sprintf("invalid status %q for todo %s: must be one of pending, in-progress, completed", update.Status, update.ID), + }, allTodos) + if err != nil { + return nil, err } + res.IsError = true + return res, nil } result := UpdateTodosOutput{} diff --git a/pkg/tools/builtin/todo_test.go b/pkg/tools/builtin/todo_test.go index 98860e9ad..f35fd45b5 100644 --- a/pkg/tools/builtin/todo_test.go +++ b/pkg/tools/builtin/todo_test.go @@ -244,7 +244,13 @@ func TestTodoTool_UpdateTodos_InvalidStatus(t *testing.T) { }) require.NoError(t, err) assert.True(t, result.IsError) - assert.Contains(t, result.Output, "done") + + var output UpdateTodosOutput + require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) + assert.Contains(t, output.Reminder, "done") + assert.Empty(t, output.Updated) + assert.Empty(t, output.NotFound) + require.Len(t, output.AllTodos, 1) // Storage should be unchanged — no partial mutation. todos := storage.All()