From 999ce645ddf04d72782d84d630eb91372b089207 Mon Sep 17 00:00:00 2001 From: Sagid Magomedov Date: Mon, 25 May 2026 12:05:27 +0100 Subject: [PATCH 1/7] fix: parse Gemma 4 reasoning tags alongside Gemma 4 streams reasoning inside ... instead of .... Without this the content leaks into chat text and the agent triggers a retry on the first turn. - TagMatcher: support multiple tag names - string[], track activeTagName so is never closed by (and vice-versa). - base-openai-compatible-provider and openai handler: match both tags. - Tests: parsing, cross-tag isolation, and invariants. --- .../base-openai-compatible-provider.spec.ts | 69 +++++++++++++++++++ .../base-openai-compatible-provider.ts | 2 +- src/api/providers/openai.ts | 2 +- src/utils/tag-matcher.ts | 60 +++++++++++----- 4 files changed, 114 insertions(+), 19 deletions(-) diff --git a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts index 847aa6e4dc..5b96fc2d17 100644 --- a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts +++ b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts @@ -97,6 +97,75 @@ describe("BaseOpenAiCompatibleProvider", () => { ]) }) + it("should handle reasoning tags () from stream", async () => { + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Deep thought" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " here" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Result: 42" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + } + }) + const stream = handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + expect(chunks).toEqual([ + { type: "reasoning", text: "Deep thought" }, + { type: "reasoning", text: " here" }, + { type: "text", text: "Result: 42" }, + ]) + }) + + it("should not close tag with tag", async () => { + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Thinking" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " but closing with wrong tag" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " still thinking" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + } + }) + const stream = handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + // The tag should be treated as text since it doesn't match the active tag + expect(chunks).toEqual([ + { type: "reasoning", text: "Thinking" }, + { type: "reasoning", text: " but closing with wrong tag" }, + { type: "reasoning", text: " still thinking" }, + ]) + }) + it("should handle complete tag in a single chunk", async () => { mockCreate.mockImplementationOnce(() => { return { diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index 28c812660f..b6094f9cc4 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -118,7 +118,7 @@ export abstract class BaseOpenAiCompatibleProvider const stream = await this.createStream(systemPrompt, messages, metadata) const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index abef612d88..c8f17dac3e 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -184,7 +184,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/utils/tag-matcher.ts b/src/utils/tag-matcher.ts index 38d99a2904..4d2ecd7e28 100644 --- a/src/utils/tag-matcher.ts +++ b/src/utils/tag-matcher.ts @@ -17,11 +17,17 @@ export class TagMatcher { state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT" depth = 0 pointer = 0 + private readonly tagNames: string[] + private activeTagName?: string + private candidates: { name: string; index: number }[] = [] + constructor( - readonly tagName: string, + tagName: string | string[], readonly transform?: (chunks: TagMatcherResult) => Result, readonly position = 0, - ) {} + ) { + this.tagNames = Array.isArray(tagName) ? tagName : [tagName] + } private collect() { if (!this.cached.length) { return @@ -57,38 +63,56 @@ export class TagMatcher { if (char === "<" && (this.pointer <= this.position + 1 || this.matched)) { this.state = "TAG_OPEN" this.index = 0 + this.candidates = this.tagNames.map((name) => ({ name, index: 0 })) } else { this.collect() } } else if (this.state === "TAG_OPEN") { - if (char === ">" && this.index === this.tagName.length) { - this.state = "TEXT" - if (!this.matched) { - this.cached = [] + if (char === ">") { + const matched = this.candidates.find((c) => c.index === c.name.length) + if (matched) { + this.state = "TEXT" + this.activeTagName = matched.name + if (!this.matched) { + this.cached = [] + } + this.depth++ + this.matched = true + continue } - this.depth++ - this.matched = true - } else if (this.index === 0 && char === "/") { + } else if (this.candidates.every((c) => c.index === 0) && char === "/") { this.state = "TAG_CLOSE" - } else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) { + this.index = 0 continue - } else if (this.tagName[this.index] === char) { - this.index++ + } else if (char === " ") { + const remaining = this.candidates.filter((c) => c.index === 0 || c.index === c.name.length) + if (remaining.length === this.candidates.length) { + continue + } + this.candidates = remaining } else { - this.state = "TEXT" - this.collect() + this.candidates = this.candidates.filter((c) => c.name[c.index] === char) + for (const c of this.candidates) { + c.index++ + } + if (this.candidates.length === 0) { + this.state = "TEXT" + this.collect() + } } } else if (this.state === "TAG_CLOSE") { - if (char === ">" && this.index === this.tagName.length) { + const tagName = this.activeTagName || this.tagNames[0] + if (char === ">" && this.index === tagName.length) { this.state = "TEXT" this.depth-- this.matched = this.depth > 0 if (!this.matched) { + this.activeTagName = undefined this.cached = [] } - } else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) { + } else if (char === " " && (this.index === 0 || this.index === tagName.length)) { continue - } else if (this.tagName[this.index] === char) { + } else if (tagName[this.index] === char) { this.index++ } else { this.state = "TEXT" @@ -102,6 +126,8 @@ export class TagMatcher { this._update(chunk) } this.collect() + this.candidates = [] + this.activeTagName = undefined return this.pop() } update(chunk: string) { From ae1a395e176efeaf816f0f85406505de0bc478e6 Mon Sep 17 00:00:00 2001 From: Sagid Magomedov Date: Tue, 26 May 2026 14:14:56 +0100 Subject: [PATCH 2/7] fix: support nested reasoning tags in TagMatcher and add comprehensive streaming tests --- src/api/providers/__tests__/openai.spec.ts | 354 +++++++++++++++++++++ src/utils/tag-matcher.ts | 23 +- 2 files changed, 371 insertions(+), 6 deletions(-) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 3c006f8318..801f70f97a 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -636,6 +636,360 @@ describe("OpenAiHandler", () => { const callArgs = mockCreate.mock.calls[0][0] expect(callArgs.max_completion_tokens).toBe(4096) }) + + describe("TagMatcher reasoning tags", () => { + it("should handle tags from stream", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Let me think" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " about this" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "The answer is 42" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "reasoning", text: "Let me think" }, + { type: "reasoning", text: " about this" }, + { type: "text", text: "The answer is 42" }, + ]) + }) + + it("should handle tags from stream", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Deep thought" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " here" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Result: 42" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "reasoning", text: "Deep thought" }, + { type: "reasoning", text: " here" }, + { type: "text", text: "Result: 42" }, + ]) + }) + + it("should not close tag with tag", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Thinking" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " but closing with wrong tag" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " still thinking" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "final text" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // The tag should not match the active tag, so the closing + // tag is treated as text. The " still thinking" stays reasoning since + // was never closed with . + expect(chunks).toEqual([ + { type: "reasoning", text: "Thinking" }, + { type: "reasoning", text: " but closing with wrong tag" }, + { type: "reasoning", text: " still thinking" }, + { type: "text", text: "final text" }, + ]) + }) + + it("should handle text without any tags", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Just regular text" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " without reasoning" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "text", text: "Just regular text" }, + { type: "text", text: " without reasoning" }, + ]) + }) + + it("should handle tags that start at beginning of stream", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "reasoning" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " content" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " normal text" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "reasoning", text: "reasoning" }, + { type: "reasoning", text: " content" }, + { type: "text", text: " normal text" }, + ]) + }) + + it("should handle incomplete tag at end of stream", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Incomplete thought" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // TagMatcher should flush remaining reasoning content on final() + expect(chunks.length).toBeGreaterThan(0) + expect( + chunks.some( + (c) => (c.type === "text" || c.type === "reasoning") && c.text.includes("Incomplete thought"), + ), + ).toBe(true) + }) + + it("should handle complete tag in a single chunk", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "text before " } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "Complete thought" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " text after" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // The TagMatcher processes the whole chunk character by character, + // so the complete tag is detected and yields reasoning text + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0]).toEqual({ type: "text", text: "text before " }) + }) + + it("should handle nested mixed tags with correct closure matching", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "outer" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "inner" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " middle" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "final text" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // With the tag stack fix, closes inner tag, + // and correctly closes the outer tag. + // inner content inside is reasoning, middle is still reasoning under + expect(chunks).toEqual([ + { type: "reasoning", text: "outer" }, + { type: "reasoning", text: "inner" }, + { type: "reasoning", text: " middle" }, + { type: "text", text: "final text" }, + ]) + }) + + it("should handle nested tags with correct stack unwinding", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "outer" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "inner" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " middle" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "final text" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // With the tag stack fix, closes inner tag, + // and correctly closes the outer tag. + // inner content inside is reasoning, middle is still reasoning under + expect(chunks).toEqual([ + { type: "reasoning", text: "outer" }, + { type: "reasoning", text: "inner" }, + { type: "reasoning", text: " middle" }, + { type: "text", text: "final text" }, + ]) + }) + + it("should handle reasoning_content alongside tag matching", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { reasoning_content: "native reasoning" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "tag based" } }] }, + }) + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: " final output" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "reasoning", text: "native reasoning" }, + { type: "reasoning", text: "tag based" }, + { type: "text", text: " final output" }, + ]) + }) + }) }) describe("error handling", () => { diff --git a/src/utils/tag-matcher.ts b/src/utils/tag-matcher.ts index 4d2ecd7e28..2f30f8e592 100644 --- a/src/utils/tag-matcher.ts +++ b/src/utils/tag-matcher.ts @@ -18,7 +18,10 @@ export class TagMatcher { depth = 0 pointer = 0 private readonly tagNames: string[] - private activeTagName?: string + private activeTagNames: string[] = [] + private inCode = false + private codeFence = 0 + private tickRun = 0 private candidates: { name: string; index: number }[] = [] constructor( @@ -63,7 +66,12 @@ export class TagMatcher { if (char === "<" && (this.pointer <= this.position + 1 || this.matched)) { this.state = "TAG_OPEN" this.index = 0 - this.candidates = this.tagNames.map((name) => ({ name, index: 0 })) + if (this.depth === 0) { + this.candidates = this.tagNames.map((name) => ({ name, index: 0 })) + } else { + const active = this.activeTagNames.at(-1) + this.candidates = active ? [{ name: active, index: 0 }] : [] + } } else { this.collect() } @@ -72,7 +80,7 @@ export class TagMatcher { const matched = this.candidates.find((c) => c.index === c.name.length) if (matched) { this.state = "TEXT" - this.activeTagName = matched.name + this.activeTagNames.push(matched.name) if (!this.matched) { this.cached = [] } @@ -101,13 +109,13 @@ export class TagMatcher { } } } else if (this.state === "TAG_CLOSE") { - const tagName = this.activeTagName || this.tagNames[0] + const tagName = this.activeTagNames.at(-1) || this.tagNames[0] if (char === ">" && this.index === tagName.length) { this.state = "TEXT" this.depth-- + this.activeTagNames.pop() this.matched = this.depth > 0 if (!this.matched) { - this.activeTagName = undefined this.cached = [] } } else if (char === " " && (this.index === 0 || this.index === tagName.length)) { @@ -127,11 +135,14 @@ export class TagMatcher { } this.collect() this.candidates = [] - this.activeTagName = undefined + this.activeTagNames = [] return this.pop() } update(chunk: string) { this._update(chunk) + if (this.state === "TEXT") { + this.collect() + } return this.pop() } } From 7d66f896b74223b3153fc8dd898c48005104a16f Mon Sep 17 00:00:00 2001 From: Sagid Magomedov Date: Tue, 26 May 2026 15:43:38 +0100 Subject: [PATCH 3/7] test(tag-matcher): add tests for unmatched closing tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two regression tests that verify depth never goes negative: 1. stray closer with no opener "finaltext" → stays text 2. duplicate closer after a proper close "thinkingfinaltext" → second stays text Both cases ensure we only decrement depth and pop activeTagNames when depth > 0, preventing underflow and treating the extra tag as plain text. --- src/api/providers/__tests__/openai.spec.ts | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 801f70f97a..6105f48519 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -777,6 +777,55 @@ describe("OpenAiHandler", () => { ]) }) + it("should treat stray closing tag as plain text when no tag is open", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { choices: [{ delta: { content: "finaltext" } }] }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([{ type: "text", text: "finaltext" }]) + }) + + it("should treat extra closing tag after a closed block as plain text", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: () => ({ + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { + choices: [{ delta: { content: "thinkingfinaltext" } }], + }, + }) + .mockResolvedValueOnce({ done: true }), + }), + })) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "reasoning", text: "thinking" }, + { type: "text", text: "finaltext" }, + ]) + }) + it("should handle tags that start at beginning of stream", async () => { mockCreate.mockImplementationOnce(() => ({ [Symbol.asyncIterator]: () => ({ From f24b964a517bc078cf101303695d2175f73824dc Mon Sep 17 00:00:00 2001 From: Sagid M Date: Fri, 26 Jun 2026 01:51:28 +0300 Subject: [PATCH 4/7] Apply suggestion from @edelauna Co-authored-by: edelauna <54631123+edelauna@users.noreply.github.com> --- src/utils/tag-matcher.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/utils/tag-matcher.ts b/src/utils/tag-matcher.ts index 2f30f8e592..bf4791fdd4 100644 --- a/src/utils/tag-matcher.ts +++ b/src/utils/tag-matcher.ts @@ -19,9 +19,6 @@ export class TagMatcher { pointer = 0 private readonly tagNames: string[] private activeTagNames: string[] = [] - private inCode = false - private codeFence = 0 - private tickRun = 0 private candidates: { name: string; index: number }[] = [] constructor( From 63c1066bb300cbcc1a83e63e99a5feb86dbfaa4d Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Fri, 26 Jun 2026 00:52:49 +0000 Subject: [PATCH 5/7] refactor(providers): updating tag matcher --- src/api/providers/lm-studio.ts | 2 +- src/api/providers/native-ollama.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index d04bd157c7..b109518557 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -104,7 +104,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan } const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 99c1dc03cf..7ee91282a4 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -215,7 +215,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio ] const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", From c7e3a5a2bd748c4b938c0cc63b11cf53ff00edca Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Fri, 26 Jun 2026 01:53:55 +0000 Subject: [PATCH 6/7] test(TagMatcher): adding tests for new logic --- src/utils/__tests__/tag-matcher.spec.ts | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 src/utils/__tests__/tag-matcher.spec.ts diff --git a/src/utils/__tests__/tag-matcher.spec.ts b/src/utils/__tests__/tag-matcher.spec.ts new file mode 100644 index 0000000000..fd2ba19f63 --- /dev/null +++ b/src/utils/__tests__/tag-matcher.spec.ts @@ -0,0 +1,74 @@ +// npx vitest utils/__tests__/tag-matcher.spec.ts + +import { TagMatcher } from "../tag-matcher" + +describe("TagMatcher", () => { + describe("collect() chunk merging (line 52)", () => { + it("merges consecutive same-type chars into one chunk within a single call", () => { + // Two text chars in one update() → both hit collect() with matched=false + // second char finds last chunk same type → last.data += char (line 52) + const matcher = new TagMatcher("think") + const result = matcher.update("ab") + expect(result).toEqual([{ matched: false, data: "ab" }]) + }) + + it("merges consecutive reasoning chars within a single call", () => { + const matcher = new TagMatcher("think") + matcher.update("") + const result = matcher.update("ab") + expect(result).toEqual([{ matched: true, data: "ab" }]) + }) + }) + + describe("final() with a chunk argument (line 131)", () => { + it("processes a chunk passed directly to final()", () => { + // Call final() with a chunk instead of update() — exercises line 131 + const matcher = new TagMatcher("think") + const result = matcher.final("hello") + expect(result).toEqual([{ matched: false, data: "hello" }]) + }) + + it("processes a closing tag passed to final()", () => { + const matcher = new TagMatcher("think") + // Don't use update() — keeps reasoning in the buffer so final() flushes it + const result = matcher.final("reasoning") + expect(result.some((r) => r.matched && r.data === "reasoning")).toBe(true) + }) + }) + + describe("space handling in TAG_OPEN (lines 93-97)", () => { + it("tolerates a space before tag name has started (line 95: all candidates at index 0)", () => { + // "< think>" — space arrives when all candidates are at index 0 + // hits line 95 (continue), candidates survive, 't' then matches normally + const matcher = new TagMatcher("think") + const result = matcher.final("< think>content") + expect(result.some((r) => r.matched && r.data === "content")).toBe(true) + }) + + it("drops mid-match candidates on a space (line 97)", () => { + // "" — space arrives mid-match (index > 0, index < name.length) + // those candidates are dropped, tag is not opened + const matcher = new TagMatcher("think") + const result = matcher.final("content") + expect(result.every((r) => !r.matched)).toBe(true) + }) + }) + + describe("space handling in TAG_CLOSE (line 119)", () => { + it("tolerates a trailing space before > in closing tag ()", () => { + // space at index === tagName.length hits line 119 (continue) + const matcher = new TagMatcher("think") + const result = matcher.final("reasoningafter") + expect(result.some((r) => r.matched && r.data === "reasoning")).toBe(true) + expect(result.some((r) => !r.matched && r.data === "after")).toBe(true) + }) + + it("tolerates a leading space after )", () => { + // space at index === 0 hits line 119 (continue) + const matcher = new TagMatcher("think") + const result = matcher.final("reasoningafter") + expect(result.some((r) => r.matched && r.data === "reasoning")).toBe(true) + expect(result.some((r) => !r.matched && r.data === "after")).toBe(true) + }) + }) +}) From 3711c0d3300f7ee0884d9db4cfb1e87c8f95d8df Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Fri, 26 Jun 2026 02:52:30 +0000 Subject: [PATCH 7/7] test(tag-matcher): consolidate reasoning tag tests into tag-matcher.spec.ts --- .../base-openai-compatible-provider.spec.ts | 9 +- src/api/providers/__tests__/openai.spec.ts | 234 ------------------ src/utils/__tests__/tag-matcher.spec.ts | 80 ++++++ src/utils/tag-matcher.ts | 8 +- 4 files changed, 87 insertions(+), 244 deletions(-) diff --git a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts index 5b96fc2d17..cbb2a91333 100644 --- a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts +++ b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts @@ -222,13 +222,8 @@ describe("BaseOpenAiCompatibleProvider", () => { chunks.push(chunk) } - // TagMatcher should handle incomplete tags and flush remaining content - expect(chunks.length).toBeGreaterThan(0) - expect( - chunks.some( - (c) => (c.type === "text" || c.type === "reasoning") && c.text.includes("Incomplete thought"), - ), - ).toBe(true) + // TagMatcher should flush incomplete reasoning content on stream end + expect(chunks).toContainEqual({ type: "reasoning", text: "Incomplete thought" }) }) it("should handle text without any tags", async () => { diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 6105f48519..708a131957 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -638,145 +638,6 @@ describe("OpenAiHandler", () => { }) describe("TagMatcher reasoning tags", () => { - it("should handle tags from stream", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Let me think" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " about this" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "The answer is 42" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - expect(chunks).toEqual([ - { type: "reasoning", text: "Let me think" }, - { type: "reasoning", text: " about this" }, - { type: "text", text: "The answer is 42" }, - ]) - }) - - it("should handle tags from stream", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Deep thought" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " here" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Result: 42" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - expect(chunks).toEqual([ - { type: "reasoning", text: "Deep thought" }, - { type: "reasoning", text: " here" }, - { type: "text", text: "Result: 42" }, - ]) - }) - - it("should not close tag with tag", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Thinking" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " but closing with wrong tag" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " still thinking" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "final text" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // The tag should not match the active tag, so the closing - // tag is treated as text. The " still thinking" stays reasoning since - // was never closed with . - expect(chunks).toEqual([ - { type: "reasoning", text: "Thinking" }, - { type: "reasoning", text: " but closing with wrong tag" }, - { type: "reasoning", text: " still thinking" }, - { type: "text", text: "final text" }, - ]) - }) - - it("should handle text without any tags", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Just regular text" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " without reasoning" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - expect(chunks).toEqual([ - { type: "text", text: "Just regular text" }, - { type: "text", text: " without reasoning" }, - ]) - }) - it("should treat stray closing tag as plain text when no tag is open", async () => { mockCreate.mockImplementationOnce(() => ({ [Symbol.asyncIterator]: () => ({ @@ -826,101 +687,6 @@ describe("OpenAiHandler", () => { ]) }) - it("should handle tags that start at beginning of stream", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "reasoning" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " content" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " normal text" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - expect(chunks).toEqual([ - { type: "reasoning", text: "reasoning" }, - { type: "reasoning", text: " content" }, - { type: "text", text: " normal text" }, - ]) - }) - - it("should handle incomplete tag at end of stream", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Incomplete thought" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // TagMatcher should flush remaining reasoning content on final() - expect(chunks.length).toBeGreaterThan(0) - expect( - chunks.some( - (c) => (c.type === "text" || c.type === "reasoning") && c.text.includes("Incomplete thought"), - ), - ).toBe(true) - }) - - it("should handle complete tag in a single chunk", async () => { - mockCreate.mockImplementationOnce(() => ({ - [Symbol.asyncIterator]: () => ({ - next: vi - .fn() - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "text before " } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: "Complete thought" } }] }, - }) - .mockResolvedValueOnce({ - done: false, - value: { choices: [{ delta: { content: " text after" } }] }, - }) - .mockResolvedValueOnce({ done: true }), - }), - })) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // The TagMatcher processes the whole chunk character by character, - // so the complete tag is detected and yields reasoning text - expect(chunks.length).toBeGreaterThan(0) - expect(chunks[0]).toEqual({ type: "text", text: "text before " }) - }) - it("should handle nested mixed tags with correct closure matching", async () => { mockCreate.mockImplementationOnce(() => ({ [Symbol.asyncIterator]: () => ({ diff --git a/src/utils/__tests__/tag-matcher.spec.ts b/src/utils/__tests__/tag-matcher.spec.ts index fd2ba19f63..114eaccce7 100644 --- a/src/utils/__tests__/tag-matcher.spec.ts +++ b/src/utils/__tests__/tag-matcher.spec.ts @@ -54,6 +54,86 @@ describe("TagMatcher", () => { }) }) + describe("multi-tag constructor (string[])", () => { + it("opens and closes when constructed with array", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("deep reasoningdone") + expect(result.some((r) => r.matched && r.data === "deep reasoning")).toBe(true) + expect(result.some((r) => !r.matched && r.data === "done")).toBe(true) + }) + + it("opens and closes when constructed with array", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("thinkingdone") + expect(result.some((r) => r.matched && r.data === "thinking")).toBe(true) + expect(result.some((r) => !r.matched && r.data === "done")).toBe(true) + }) + + it(" open is not closed by (cross-tag isolation)", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("reasoningstill reasoningdone") + // must be treated as text since active tag is + expect(result.some((r) => r.matched && r.data.includes(""))).toBe(true) + expect(result.some((r) => !r.matched && r.data === "done")).toBe(true) + }) + + it(" open is not closed by (inverse cross-tag isolation)", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("reasoningstill reasoningdone") + // must be treated as text since active tag is + expect(result.some((r) => r.matched && r.data.includes(""))).toBe(true) + expect(result.some((r) => !r.matched && r.data === "done")).toBe(true) + }) + }) + + describe("chunk split at mid-tag-name boundary", () => { + it("correctly opens tag split across two update() calls", () => { + const matcher = new TagMatcher("think") + const first = matcher.update("content") + expect(second.some((r) => r.matched && r.data === "content")).toBe(true) + }) + }) + + describe("unmatched > in TAG_OPEN falls back to TEXT", () => { + it("treats as plain text when xyz is not a configured tag name", () => { + const matcher = new TagMatcher("think") + const result = matcher.final("content") + expect(result.every((r) => !r.matched)).toBe(true) + }) + + it("treats stray closing tag as plain text when no tag is open", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("finaltext") + expect(result).toEqual([{ matched: false, data: "finaltext" }]) + }) + + it("treats extra closing tag after a closed block as plain text", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("thinkingfinaltext") + expect(result.some((r) => r.matched && r.data === "thinking")).toBe(true) + expect(result.some((r) => !r.matched && r.data === "finaltext")).toBe(true) + }) + }) + + describe("nested tags", () => { + it("treats inner as text when outer is active", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("outerinner middlefinal") + expect(result.some((r) => r.matched && r.data.includes("inner"))).toBe(true) + expect(result.some((r) => !r.matched && r.data === "final")).toBe(true) + }) + + it("correctly unwinds nested same-name tags", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("outerinner middlefinal") + expect(result.some((r) => r.matched && r.data.includes("inner"))).toBe(true) + expect(result.some((r) => !r.matched && r.data === "final")).toBe(true) + }) + }) + describe("space handling in TAG_CLOSE (line 119)", () => { it("tolerates a trailing space before > in closing tag ()", () => { // space at index === tagName.length hits line 119 (continue) diff --git a/src/utils/tag-matcher.ts b/src/utils/tag-matcher.ts index bf4791fdd4..f10bf2f1ee 100644 --- a/src/utils/tag-matcher.ts +++ b/src/utils/tag-matcher.ts @@ -22,7 +22,7 @@ export class TagMatcher { private candidates: { name: string; index: number }[] = [] constructor( - tagName: string | string[], + tagName: string | [string, ...string[]], readonly transform?: (chunks: TagMatcherResult) => Result, readonly position = 0, ) { @@ -62,7 +62,6 @@ export class TagMatcher { if (this.state === "TEXT") { if (char === "<" && (this.pointer <= this.position + 1 || this.matched)) { this.state = "TAG_OPEN" - this.index = 0 if (this.depth === 0) { this.candidates = this.tagNames.map((name) => ({ name, index: 0 })) } else { @@ -84,6 +83,9 @@ export class TagMatcher { this.depth++ this.matched = true continue + } else { + this.state = "TEXT" + this.collect() } } else if (this.candidates.every((c) => c.index === 0) && char === "/") { this.state = "TAG_CLOSE" @@ -106,7 +108,7 @@ export class TagMatcher { } } } else if (this.state === "TAG_CLOSE") { - const tagName = this.activeTagNames.at(-1) || this.tagNames[0] + const tagName = this.activeTagNames.at(-1) ?? this.tagNames[0] if (char === ">" && this.index === tagName.length) { this.state = "TEXT" this.depth--