From c503c928f5862a8b5850041dcc7c2c01b7da6b86 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 02:51:22 +0000 Subject: [PATCH] Chat-loop: tell the model how long since its last reply The per-turn tag already gives the model a clock so it can answer "what year is it?" without guessing. It didn't tell the model whether the user is mid-thought ("you just answered") or reviving a thread from days ago - the model has no way to read that from the timestamps alone, and the user shouldn't have to spell it out every time. Extend the tag with an optional since_last_response="..." attribute carrying a coarse, conversational elapsed string ("about 22 hours", "yesterday", "about 3 days") computed against the most recent persisted assistant message's created_at. Chat.svelte walks its messages array for the latest role==='assistant' row that isn't marked for regenerate-from-here, and passes the timestamp through as a new ChatLoopOptions.lastAssistantTimestamp. The chat-loop recomputes the elapsed bucket every round (matching the existing datetime-tag recomputation) so a long multi-tool turn doesn't ship a stale value. Buckets are intentionally fuzzy: the model uses this to calibrate register, not to do arithmetic, and "yesterday" / "about 3 days" obviously read as a stale-thread revival while "a few minutes" / "just now" read as a live continuation. Synthetic ephemeral injections (intuition / context-recall blocks) are not persisted and therefore not eligible anchors - the semantic is "how long since you last actually replied to the user?". The attribute is omitted on the opening turn (no prior assistant to anchor against) and when the supplied timestamp doesn't parse, so a fresh thread never carries a misleading "just now". The system prompt's datetime block gains a paragraph teaching the model how to read the attribute and explicitly cautioning against quoting it back at the user verbatim or thanking them for the gap. --- docs/dev/chat.md | 19 +++++ src/lib/chat-loop.ts | 88 ++++++++++++++++++++- src/lib/chat-prompt.ts | 3 + src/screens/Chat.svelte | 22 ++++++ tests/chat-loop.test.ts | 165 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+), 4 deletions(-) diff --git a/docs/dev/chat.md b/docs/dev/chat.md index 5b9c126..dabcaf6 100644 --- a/docs/dev/chat.md +++ b/docs/dev/chat.md @@ -320,6 +320,25 @@ A chat turn goes: every round of the loop, not once at send-time, so a long multi-tool turn reflects actual elapsed time. + On mid-thread turns the same tag also carries + `since_last_response="..."` (e.g. "about 22 hours", + "yesterday", "about 3 days"). The chat-loop receives the + most recent persisted assistant message's `created_at` via + the `lastAssistantTimestamp` option (Chat.svelte walks its + messages array for the latest role==='assistant' row that + isn't in `pendingDeleteSet`), and `formatRelativeDuration` + buckets the wall-clock delta into a coarse human-friendly + string. The model uses it to calibrate register - resume + vs. re-orient - on a thread the user revived hours or days + later. The attribute is OMITTED on the opening turn (no + prior assistant to anchor against) and when the supplied + timestamp doesn't parse, so a fresh thread never carries + a misleading "just now". Synthetic ephemeral injections + (intuition / context-recall `` blocks) aren't + persisted and therefore not eligible anchors - the semantic + is "how long since you last actually replied to the user?", + not "since any assistant-role row appeared on the wire." + The system prompt unconditionally mentions URL scraping so the model doesn't refuse "what does this page say?" with a generic "I can't browse the web" when a scraped page is sitting in the diff --git a/src/lib/chat-loop.ts b/src/lib/chat-loop.ts index 1d9dc81..b2ce2bd 100644 --- a/src/lib/chat-loop.ts +++ b/src/lib/chat-loop.ts @@ -402,6 +402,39 @@ export function buildThreadAttachmentsBlock( return lines.join('\n'); } +/** + * Format a millisecond duration as a coarse, conversational + * description of elapsed time. The output is intentionally fuzzy - + * the model uses this to calibrate its register ("you just asked" + * vs "it's been a while") rather than to do arithmetic, so a stepped + * bucket matches the LLM's actual decision boundary better than a + * precise "22 hours 14 minutes" string. + * + * Negative or non-finite input returns "just now" - clock skew (a + * persisted assistant row whose created_at is slightly in the + * future relative to the browser's `Date.now()` because the DB + * stamped it on the server side) shouldn't surface as a baffling + * "in the future" string in the prompt. + */ +function formatRelativeDuration(elapsedMs: number): string { + if (!Number.isFinite(elapsedMs) || elapsedMs < 0) return 'just now'; + const sec = Math.floor(elapsedMs / 1000); + if (sec < 120) return 'just now'; + const min = Math.floor(sec / 60); + if (min < 10) return 'a few minutes'; + if (min < 60) return `about ${min} minutes`; + const hr = Math.floor(min / 60); + if (hr < 24) return hr === 1 ? 'about an hour' : `about ${hr} hours`; + const day = Math.floor(hr / 24); + if (day < 2) return 'yesterday'; + if (day < 14) return `about ${day} days`; + const week = Math.floor(day / 7); + if (day < 60) return `about ${week} weeks`; + const month = Math.floor(day / 30); + if (month < 12) return `about ${month} months`; + return 'over a year'; +} + /** * Build the per-turn `` tag that gets prepended to the * latest user turn (outside the `` boundary, see @@ -420,6 +453,13 @@ export function buildThreadAttachmentsBlock( * - `zone`: the IANA zone name itself, so the model can name the * timezone in replies ("it's 3pm in America/Los_Angeles") and * so the value is self-describing if surfaced in logs. + * - `since_last_response`: coarse human-friendly elapsed time + * since the last persisted assistant message, e.g. "about 22 + * hours" or "yesterday". OMITTED on the opening turn of a + * thread (no prior assistant message to anchor against) and on + * any call that doesn't supply `lastAssistantTimestamp` - so a + * fresh thread doesn't carry a meaningless "just now" and older + * test fixtures keep matching their datetime regexes. * * This exists because LLMs have no clock - the model was trained * months ago, and without an injected datetime it either refuses @@ -427,14 +467,20 @@ export function buildThreadAttachmentsBlock( * outside `` so the boundary contract from the system * prompt applies: anything outside the tags is platform-injected * metadata, not human input the model should echo or thank the - * user for. + * user for. The `since_last_response` attribute extends that + * contract to "how long since you last replied?" - the model can + * calibrate register (resume vs. greet, refresh context vs. assume + * shared state) without the user having to spell it out. * * Computed fresh per round in the chat-loop, not once at send-time. * Multi-round tool loops can take 30+ seconds; recomputing every * round keeps the value honest if the model asks the user "what * time is it now?" mid-tool-loop on a long-running turn. */ -function buildDatetimeTag(tz: string | null | undefined): string { +function buildDatetimeTag( + tz: string | null | undefined, + lastAssistantTimestamp: string | null | undefined, +): string { const now = new Date(); // Drop sub-second precision: noisy in the prompt and the model // doesn't use millisecond resolution for anything. @@ -473,7 +519,17 @@ function buildDatetimeTag(tz: string | null | undefined): string { // calibration. zoneAttr = 'UTC'; } - return ``; + let sinceAttr = ''; + if (typeof lastAssistantTimestamp === 'string' && lastAssistantTimestamp.length > 0) { + const anchor = Date.parse(lastAssistantTimestamp); + // Date.parse returns NaN for an unparseable input (corrupt row, + // legacy timestamp shape). Skip the attribute rather than ship a + // garbage value - the boundary contract still holds without it. + if (Number.isFinite(anchor)) { + sinceAttr = ` since_last_response="${formatRelativeDuration(now.getTime() - anchor)}"`; + } + } + return ``; } /** @@ -892,6 +948,20 @@ export interface ChatLoopOptions { * clock time. */ journalTimezone?: string | null; + /** + * ISO 8601 `created_at` of the most recent persisted assistant + * message on the thread, used to compute the `since_last_response` + * attribute on the per-turn `` tag. Null / undefined on + * the opening turn of a thread (no prior assistant message); the + * datetime tag then omits the attribute rather than shipping a + * meaningless "just now". Caller (Chat.svelte) walks its + * `messages` array for the latest role==='assistant' row - + * synthetic ephemeral injections (intuition / context-recall + * blocks) are not persisted and therefore not eligible + * anchors, which is the correct semantic: "how long since your + * last actual reply to the user?". + */ + lastAssistantTimestamp?: string | null; /** * Optional id of the user message that opened this turn. When set, * the chat-loop pairs it with the terminal assistant message id and @@ -1120,6 +1190,7 @@ export async function runChatLoop(opts: ChatLoopOptions): Promise` tag's since_last_response attribute, but the bucket +// thresholds (just-now / few-minutes / hour / day / week / month / +// year boundaries) are easier to verify directly than via a +// runChatLoop fixture per bucket. +export const __test = { + formatRelativeDuration, +}; diff --git a/src/lib/chat-prompt.ts b/src/lib/chat-prompt.ts index c341c75..959883b 100644 --- a/src/lib/chat-prompt.ts +++ b/src/lib/chat-prompt.ts @@ -230,6 +230,9 @@ A tag may also appear outside the That tag is the platform telling you the actual current wall-clock time at the moment this request was built; the local attribute is ISO 8601 in the user's configured timezone, utc is ISO 8601 in UTC, and zone is the IANA name. Treat it as authoritative when answering questions about the current date, day of the week, time of day, or year. Do NOT rely on training-cutoff knowledge for "what year is it?" or "what day is today?"; read the tag. +The tag may carry an additional since_last_response="..." attribute (e.g. "about 22 hours", "yesterday", "about 3 days") that tells you roughly how much wall-clock time has passed between your last reply on this thread and the user's current message. +Use it to calibrate register: a fresh continuation within minutes means picking up mid-thought; "yesterday" or "about 3 days" means the user is reviving an older conversation and may benefit from a brief reorientation rather than a context-free continuation. +Do NOT quote the elapsed string back at the user verbatim or thank them for the gap; treat it as silent context the same as the rest of the datetime tag. The attribute is absent on the opening turn of a thread (no prior assistant message to anchor against) - in that case there is simply no elapsed time to consider. `; // System reminder channel. Trailing `role: 'system'` messages were getting diff --git a/src/screens/Chat.svelte b/src/screens/Chat.svelte index d1aba4b..3d01233 100644 --- a/src/screens/Chat.svelte +++ b/src/screens/Chat.svelte @@ -2416,6 +2416,27 @@ .map((m) => toVeniceMessage(m, { visionSpec: ctx.tierSpec })), ]; + // Anchor for the `` tag's since_last_response attribute. + // Walk the persisted messages from the end and return the + // created_at of the most recent role==='assistant' row that isn't + // marked for regenerate-from-here (pendingDeleteSet) - those rows + // are about to be replaced and shouldn't count as "your last + // reply". null on the opening turn (no prior assistant) and on a + // regenerate that drops every prior assistant row; the chat-loop + // omits the attribute in both cases. Recomputed at call time so + // an auto-retry after a 429 sees newly-persisted assistant rows + // from earlier tool rounds (the chat-loop persists mid-turn + // assistant rows before any final-text row lands). + const findLastAssistantTimestamp = (): string | null => { + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]; + if (m.role !== 'assistant') continue; + if (pendingDeleteSet.has(m.id)) continue; + return m.created_at; + } + return null; + }; + // Throttle streamingText updates to ~2Hz while the response // arrives. Every assignment drives to re-run marked // + DOMPurify + highlight.js over the full growing buffer, so @@ -2511,6 +2532,7 @@ userName: ctx.sendUserName, userLocation: ctx.sendUserLocation, journalTimezone: app.journalTimezone || null, + lastAssistantTimestamp: findLastAssistantTimestamp(), intuitionModelId: agentModel('intuition').id, intuitionMood: intuitionMoodArg, // Topic-boundary recall rides the same trigger machinery as diff --git a/tests/chat-loop.test.ts b/tests/chat-loop.test.ts index c9c735b..b15b5f7 100644 --- a/tests/chat-loop.test.ts +++ b/tests/chat-loop.test.ts @@ -17,6 +17,7 @@ import { toVeniceMessage, INTERRUPTED_MARKER, buildThreadAttachmentsBlock, + __test as chatLoopTest, } from '../src/lib/chat-loop'; import type { ChatCompletion, @@ -460,6 +461,77 @@ describe('toVeniceMessage', () => { }); }); +describe('formatRelativeDuration', () => { + // Buckets matching the formatter's decision tree. The model uses + // these as a register-calibration signal, not for arithmetic, so + // the values are deliberately coarse - the goal is for "yesterday" + // and "about 3 days" to obviously read as a stale-thread revival + // while "a few minutes" and "just now" read as a live continuation. + const fmt = chatLoopTest.formatRelativeDuration; + const SEC = 1000; + const MIN = 60 * SEC; + const HR = 60 * MIN; + const DAY = 24 * HR; + + it('returns "just now" for <2 minutes and for clock skew', () => { + expect(fmt(0)).toBe('just now'); + expect(fmt(30 * SEC)).toBe('just now'); + expect(fmt(119 * SEC)).toBe('just now'); + // Negative ms = anchor is in the future relative to now. Can + // happen when the DB stamped created_at server-side and the + // browser clock is slightly behind; ship "just now" rather than + // "in 4 seconds". + expect(fmt(-5 * SEC)).toBe('just now'); + expect(fmt(NaN)).toBe('just now'); + }); + + it('returns "a few minutes" for 2-10 minutes', () => { + expect(fmt(2 * MIN)).toBe('a few minutes'); + expect(fmt(9 * MIN)).toBe('a few minutes'); + }); + + it('returns "about N minutes" for 10-60 minutes', () => { + expect(fmt(10 * MIN)).toBe('about 10 minutes'); + expect(fmt(45 * MIN)).toBe('about 45 minutes'); + expect(fmt(59 * MIN)).toBe('about 59 minutes'); + }); + + it('singularizes the hour bucket', () => { + expect(fmt(60 * MIN)).toBe('about an hour'); + expect(fmt(119 * MIN)).toBe('about an hour'); + }); + + it('returns "about N hours" for 2-24 hours', () => { + expect(fmt(2 * HR)).toBe('about 2 hours'); + expect(fmt(22 * HR)).toBe('about 22 hours'); + }); + + it('returns "yesterday" for 24-48 hours', () => { + expect(fmt(24 * HR)).toBe('yesterday'); + expect(fmt(36 * HR)).toBe('yesterday'); + }); + + it('returns "about N days" for 2-14 days', () => { + expect(fmt(2 * DAY)).toBe('about 2 days'); + expect(fmt(13 * DAY)).toBe('about 13 days'); + }); + + it('returns "about N weeks" for 14-60 days', () => { + expect(fmt(14 * DAY)).toBe('about 2 weeks'); + expect(fmt(59 * DAY)).toBe('about 8 weeks'); + }); + + it('returns "about N months" for 60 days to a year', () => { + expect(fmt(60 * DAY)).toBe('about 2 months'); + expect(fmt(300 * DAY)).toBe('about 10 months'); + }); + + it('returns "over a year" beyond ~12 months', () => { + expect(fmt(365 * DAY)).toBe('over a year'); + expect(fmt(5 * 365 * DAY)).toBe('over a year'); + }); +}); + describe('runChatLoop', () => { it('forwards reasoningEffort to every streamChat call', async () => { // The loop doesn't gate on ModelSpec.supportsReasoning — that's the @@ -936,6 +1008,99 @@ describe('runChatLoop', () => { expect(zone.length).toBeGreaterThan(0); }); + it('omits since_last_response from the datetime tag on the opening turn', async () => { + // Opening turn = no prior assistant message to anchor against, so + // the caller passes lastAssistantTimestamp=null (or omits it). + // Shipping "just now" or any other value here would be a lie - + // there is no prior reply. Match the tag with an immediate `/>` + // closer to assert the attribute is absent. + const seenRequests: ChatRequest[] = []; + const venice = { + async *streamChat(req: ChatRequest): AsyncGenerator { + seenRequests.push(req); + yield { type: 'text', delta: 'ok' }; + }, + } as unknown as VeniceClient; + const { svc } = mockSupabase(); + await runChatLoop({ + venice, + supabase: svc, + thread: mkThread(), + userId: 'u-1', + modelId: 'm', + history: [{ role: 'user', content: 'hi' }], + lastAssistantTimestamp: null, + signal: new AbortController().signal, + }); + const userMsg = seenRequests[0].messages.find((m) => m.role === 'user'); + const content = userMsg?.content as string; + expect(content).toMatch( + /^\nhi<\/user_message>$/, + ); + expect(content.includes('since_last_response')).toBe(false); + }); + + it('includes since_last_response in the datetime tag when lastAssistantTimestamp is supplied', async () => { + // Mid-thread turns carry a coarse human-friendly elapsed string + // so the model can calibrate register ("you just answered" vs + // "it's been a few days"). We assert the attribute lands with a + // sensible bucket - exact wording is the formatter's job and is + // covered separately. + const seenRequests: ChatRequest[] = []; + const venice = { + async *streamChat(req: ChatRequest): AsyncGenerator { + seenRequests.push(req); + yield { type: 'text', delta: 'ok' }; + }, + } as unknown as VeniceClient; + const { svc } = mockSupabase(); + // 22 hours ago - "about 22 hours" bucket. + const anchor = new Date(Date.now() - 22 * 60 * 60 * 1000).toISOString(); + await runChatLoop({ + venice, + supabase: svc, + thread: mkThread(), + userId: 'u-1', + modelId: 'm', + history: [{ role: 'user', content: 'hi' }], + lastAssistantTimestamp: anchor, + signal: new AbortController().signal, + }); + const userMsg = seenRequests[0].messages.find((m) => m.role === 'user'); + const content = userMsg?.content as string; + expect(content).toMatch( + /^\nhi<\/user_message>$/, + ); + }); + + it('omits since_last_response when lastAssistantTimestamp is unparseable', async () => { + // A corrupt / unexpected timestamp string (Date.parse returns NaN) + // is treated as "no anchor available" rather than emitting a + // garbage value. The rest of the datetime tag still rides; only + // the elapsed attribute is dropped. + const seenRequests: ChatRequest[] = []; + const venice = { + async *streamChat(req: ChatRequest): AsyncGenerator { + seenRequests.push(req); + yield { type: 'text', delta: 'ok' }; + }, + } as unknown as VeniceClient; + const { svc } = mockSupabase(); + await runChatLoop({ + venice, + supabase: svc, + thread: mkThread(), + userId: 'u-1', + modelId: 'm', + history: [{ role: 'user', content: 'hi' }], + lastAssistantTimestamp: 'not-a-date', + signal: new AbortController().signal, + }); + const userMsg = seenRequests[0].messages.find((m) => m.role === 'user'); + const content = userMsg?.content as string; + expect(content.includes('since_last_response')).toBe(false); + }); + it('persists a plain text response in one round', async () => { const venice = mockVenice([ [{ type: 'text', delta: 'Hello' }, { type: 'text', delta: ' there' }],