sysread · sysread · May 12, 2026 · May 12, 2026
diff --git a/docs/dev/chat.md b/docs/dev/chat.md
@@ -320,6 +320,25 @@ A chat turn goes:
   every round of the loop, not once at send-time, so a long
   multi-tool turn reflects actual elapsed time.
 
+  On mid-thread turns the same tag also carries
+  `since_last_response="..."` (e.g. "about 22 hours",
+  "yesterday", "about 3 days"). The chat-loop receives the
+  most recent persisted assistant message's `created_at` via
+  the `lastAssistantTimestamp` option (Chat.svelte walks its
+  messages array for the latest role==='assistant' row that
+  isn't in `pendingDeleteSet`), and `formatRelativeDuration`
+  buckets the wall-clock delta into a coarse human-friendly
+  string. The model uses it to calibrate register - resume
+  vs. re-orient - on a thread the user revived hours or days
+  later. The attribute is OMITTED on the opening turn (no
+  prior assistant to anchor against) and when the supplied
+  timestamp doesn't parse, so a fresh thread never carries
+  a misleading "just now". Synthetic ephemeral injections
+  (intuition / context-recall `<think>` blocks) aren't
+  persisted and therefore not eligible anchors - the semantic
+  is "how long since you last actually replied to the user?",
+  not "since any assistant-role row appeared on the wire."
+
   The system prompt unconditionally mentions URL scraping so the
   model doesn't refuse "what does this page say?" with a generic
   "I can't browse the web" when a scraped page is sitting in the

diff --git a/src/lib/chat-loop.ts b/src/lib/chat-loop.ts
@@ -402,6 +402,39 @@ export function buildThreadAttachmentsBlock(
   return lines.join('\n');
 }
 
+/**
+ * Format a millisecond duration as a coarse, conversational
+ * description of elapsed time. The output is intentionally fuzzy -
+ * the model uses this to calibrate its register ("you just asked"
+ * vs "it's been a while") rather than to do arithmetic, so a stepped
+ * bucket matches the LLM's actual decision boundary better than a
+ * precise "22 hours 14 minutes" string.
+ *
+ * Negative or non-finite input returns "just now" - clock skew (a
+ * persisted assistant row whose created_at is slightly in the
+ * future relative to the browser's `Date.now()` because the DB
+ * stamped it on the server side) shouldn't surface as a baffling
+ * "in the future" string in the prompt.
+ */
+function formatRelativeDuration(elapsedMs: number): string {
+  if (!Number.isFinite(elapsedMs) || elapsedMs < 0) return 'just now';
+  const sec = Math.floor(elapsedMs / 1000);
+  if (sec < 120) return 'just now';
+  const min = Math.floor(sec / 60);
+  if (min < 10) return 'a few minutes';
+  if (min < 60) return `about ${min} minutes`;
+  const hr = Math.floor(min / 60);
+  if (hr < 24) return hr === 1 ? 'about an hour' : `about ${hr} hours`;
+  const day = Math.floor(hr / 24);
+  if (day < 2) return 'yesterday';
+  if (day < 14) return `about ${day} days`;
+  const week = Math.floor(day / 7);
+  if (day < 60) return `about ${week} weeks`;
+  const month = Math.floor(day / 30);
+  if (month < 12) return `about ${month} months`;
+  return 'over a year';
+}
+
 /**
  * Build the per-turn `<datetime>` tag that gets prepended to the
  * latest user turn (outside the `<user_message>` boundary, see
@@ -420,21 +453,34 @@ export function buildThreadAttachmentsBlock(
  *   - `zone`: the IANA zone name itself, so the model can name the
  *     timezone in replies ("it's 3pm in America/Los_Angeles") and
  *     so the value is self-describing if surfaced in logs.
+ *   - `since_last_response`: coarse human-friendly elapsed time
+ *     since the last persisted assistant message, e.g. "about 22
+ *     hours" or "yesterday". OMITTED on the opening turn of a
+ *     thread (no prior assistant message to anchor against) and on
+ *     any call that doesn't supply `lastAssistantTimestamp` - so a
+ *     fresh thread doesn't carry a meaningless "just now" and older
+ *     test fixtures keep matching their datetime regexes.
  *
  * This exists because LLMs have no clock - the model was trained
  * months ago, and without an injected datetime it either refuses
  * "what year is it?" or hallucinates a stale answer. The tag rides
  * outside `<user_message>` so the boundary contract from the system
  * prompt applies: anything outside the tags is platform-injected
  * metadata, not human input the model should echo or thank the
- * user for.
+ * user for. The `since_last_response` attribute extends that
+ * contract to "how long since you last replied?" - the model can
+ * calibrate register (resume vs. greet, refresh context vs. assume
+ * shared state) without the user having to spell it out.
  *
  * Computed fresh per round in the chat-loop, not once at send-time.
  * Multi-round tool loops can take 30+ seconds; recomputing every
  * round keeps the value honest if the model asks the user "what
  * time is it now?" mid-tool-loop on a long-running turn.
  */
-function buildDatetimeTag(tz: string | null | undefined): string {
+function buildDatetimeTag(
+  tz: string | null | undefined,
+  lastAssistantTimestamp: string | null | undefined,
+): string {
   const now = new Date();
   // Drop sub-second precision: noisy in the prompt and the model
   // doesn't use millisecond resolution for anything.
@@ -473,7 +519,17 @@ function buildDatetimeTag(tz: string | null | undefined): string {
     // calibration.
     zoneAttr = 'UTC';
   }
-  return `<datetime local="${local}" utc="${utc}" zone="${zoneAttr}" />`;
+  let sinceAttr = '';
+  if (typeof lastAssistantTimestamp === 'string' && lastAssistantTimestamp.length > 0) {
+    const anchor = Date.parse(lastAssistantTimestamp);
+    // Date.parse returns NaN for an unparseable input (corrupt row,
+    // legacy timestamp shape). Skip the attribute rather than ship a
+    // garbage value - the boundary contract still holds without it.
+    if (Number.isFinite(anchor)) {
+      sinceAttr = ` since_last_response="${formatRelativeDuration(now.getTime() - anchor)}"`;
+    }
+  }
+  return `<datetime local="${local}" utc="${utc}" zone="${zoneAttr}"${sinceAttr} />`;
 }
 
 /**
@@ -892,6 +948,20 @@ export interface ChatLoopOptions {
    * clock time.
    */
   journalTimezone?: string | null;
+  /**
+   * ISO 8601 `created_at` of the most recent persisted assistant
+   * message on the thread, used to compute the `since_last_response`
+   * attribute on the per-turn `<datetime>` tag. Null / undefined on
+   * the opening turn of a thread (no prior assistant message); the
+   * datetime tag then omits the attribute rather than shipping a
+   * meaningless "just now". Caller (Chat.svelte) walks its
+   * `messages` array for the latest role==='assistant' row -
+   * synthetic ephemeral injections (intuition / context-recall
+   * <think> blocks) are not persisted and therefore not eligible
+   * anchors, which is the correct semantic: "how long since your
+   * last actual reply to the user?".
+   */
+  lastAssistantTimestamp?: string | null;
   /**
    * Optional id of the user message that opened this turn. When set,
    * the chat-loop pairs it with the terminal assistant message id and
@@ -1120,6 +1190,7 @@ export async function runChatLoop(opts: ChatLoopOptions): Promise<ChatLoopResult
     verbosity,
     emphasisMarkdown,
     journalTimezone,
+    lastAssistantTimestamp,
     userMessageId,
     userName,
     userLocation,
@@ -1518,7 +1589,7 @@ export async function runChatLoop(opts: ChatLoopOptions): Promise<ChatLoopResult
     // appendix gets buried above a long `history`. Riding inside the
     // user-role content is the position the model is guaranteed to
     // attend to.
-    const datetimeTag = buildDatetimeTag(journalTimezone);
+    const datetimeTag = buildDatetimeTag(journalTimezone, lastAssistantTimestamp);
     const projectedHistory = tagLastUserMessage(history, datetimeTag, titleReminder);
     const requestMessages: VeniceMessage[] = [
       {
@@ -1998,3 +2069,12 @@ export async function runChatLoop(opts: ChatLoopOptions): Promise<ChatLoopResult
 
   return { finalText, roundsRun, stoppedByLimit, interrupted, conflictDetected, toolboxesEnabled };
 }
+
+// Test hook: the formatter is otherwise integration-tested via the
+// `<datetime>` tag's since_last_response attribute, but the bucket
+// thresholds (just-now / few-minutes / hour / day / week / month /
+// year boundaries) are easier to verify directly than via a
+// runChatLoop fixture per bucket.
+export const __test = {
+  formatRelativeDuration,
+};
diff --git a/src/lib/chat-prompt.ts b/src/lib/chat-prompt.ts
@@ -230,6 +230,9 @@ A <datetime local="..." utc="..." zone="..." /> tag may also appear outside the
 That tag is the platform telling you the actual current wall-clock time at the moment this request was built; the local attribute is ISO 8601 in the user's configured timezone, utc is ISO 8601 in UTC, and zone is the IANA name.
 Treat it as authoritative when answering questions about the current date, day of the week, time of day, or year.
 Do NOT rely on training-cutoff knowledge for "what year is it?" or "what day is today?"; read the tag.
+The tag may carry an additional since_last_response="..." attribute (e.g. "about 22 hours", "yesterday", "about 3 days") that tells you roughly how much wall-clock time has passed between your last reply on this thread and the user's current message.
+Use it to calibrate register: a fresh continuation within minutes means picking up mid-thought; "yesterday" or "about 3 days" means the user is reviving an older conversation and may benefit from a brief reorientation rather than a context-free continuation.
+Do NOT quote the elapsed string back at the user verbatim or thank them for the gap; treat it as silent context the same as the rest of the datetime tag. The attribute is absent on the opening turn of a thread (no prior assistant message to anchor against) - in that case there is simply no elapsed time to consider.
 `;
 
 // System reminder channel. Trailing `role: 'system'` messages were getting

diff --git a/src/screens/Chat.svelte b/src/screens/Chat.svelte
@@ -2416,6 +2416,27 @@
         .map((m) => toVeniceMessage(m, { visionSpec: ctx.tierSpec })),
     ];
 
+    // Anchor for the `<datetime>` tag's since_last_response attribute.
+    // Walk the persisted messages from the end and return the
+    // created_at of the most recent role==='assistant' row that isn't
+    // marked for regenerate-from-here (pendingDeleteSet) - those rows
+    // are about to be replaced and shouldn't count as "your last
+    // reply". null on the opening turn (no prior assistant) and on a
+    // regenerate that drops every prior assistant row; the chat-loop
+    // omits the attribute in both cases. Recomputed at call time so
+    // an auto-retry after a 429 sees newly-persisted assistant rows
+    // from earlier tool rounds (the chat-loop persists mid-turn
+    // assistant rows before any final-text row lands).
+    const findLastAssistantTimestamp = (): string | null => {
+      for (let i = messages.length - 1; i >= 0; i--) {
+        const m = messages[i];
+        if (m.role !== 'assistant') continue;
+        if (pendingDeleteSet.has(m.id)) continue;
+        return m.created_at;
+      }
+      return null;
+    };
+
     // Throttle streamingText updates to ~2Hz while the response
     // arrives. Every assignment drives <Markdown> to re-run marked
     // + DOMPurify + highlight.js over the full growing buffer, so
@@ -2511,6 +2532,7 @@
           userName: ctx.sendUserName,
           userLocation: ctx.sendUserLocation,
           journalTimezone: app.journalTimezone || null,
+          lastAssistantTimestamp: findLastAssistantTimestamp(),
           intuitionModelId: agentModel('intuition').id,
           intuitionMood: intuitionMoodArg,
           // Topic-boundary recall rides the same trigger machinery as