diff --git a/apps/vscode-e2e/src/suite/index.ts b/apps/vscode-e2e/src/suite/index.ts
index 4c63b70787..63d29ec28c 100644
--- a/apps/vscode-e2e/src/suite/index.ts
+++ b/apps/vscode-e2e/src/suite/index.ts
@@ -79,6 +79,16 @@ export async function run() {
 		throw new Error(`No test files found matching criteria: ${process.env.TEST_FILE || "all tests"}`)
 	}
 
+	// Run provider suites last so their teardown (which may leave per-mode profile
+	// pins pointing at non-default providers) doesn't affect tool suites that start
+	// tasks in specific modes and expect the default openrouter config.
+	testFiles.sort((a, b) => {
+		const aIsProvider = a.includes("/providers/")
+		const bIsProvider = b.includes("/providers/")
+		if (aIsProvider === bIsProvider) return a.localeCompare(b)
+		return aIsProvider ? 1 : -1
+	})
+
 	testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
 
 	return new Promise<void>((resolve, reject) =>
diff --git a/apps/vscode-e2e/src/suite/providers/zai.test.ts b/apps/vscode-e2e/src/suite/providers/zai.test.ts
index 2f4a7ab7d7..69e938d582 100644
--- a/apps/vscode-e2e/src/suite/providers/zai.test.ts
+++ b/apps/vscode-e2e/src/suite/providers/zai.test.ts
@@ -219,6 +219,7 @@ suite("Z.ai GLM provider", function () {
 		})
 
 		await waitUntilCompleted({ api, taskId })
+		const capturedMaxTokens = requestCapture.maxTokens
 
 		const completionMessage = messages.find(
 			({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4",
@@ -228,10 +229,12 @@ suite("Z.ai GLM provider", function () {
 
 		// Verify max_tokens uses the restored default clamp (20% of context window)
 		// unless the user explicitly overrides it via modelMaxTokens.
+		// Snapshot immediately after waitUntilCompleted to avoid straggling async calls
+		// from this task overwriting requestCapture before the assertion runs.
 		assert.strictEqual(
-			requestCapture.maxTokens,
+			capturedMaxTokens,
 			40_000,
-			`max_tokens should default to the glm-5.1 clamp (40_000) but was ${requestCapture.maxTokens}`,
+			`max_tokens should default to the glm-5.1 clamp (40_000) but was ${capturedMaxTokens}`,
 		)
 	})
 
@@ -260,6 +263,7 @@ suite("Z.ai GLM provider", function () {
 		})
 
 		await waitUntilCompleted({ api, taskId })
+		const capturedMaxTokens = requestCapture.maxTokens
 
 		const completionMessage = messages.find(
 			({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4",
@@ -269,10 +273,12 @@ suite("Z.ai GLM provider", function () {
 
 		// Verify max_tokens uses the restored default clamp (20% of context window)
 		// unless the user explicitly overrides it via modelMaxTokens.
+		// Snapshot immediately after waitUntilCompleted to avoid straggling async calls
+		// from the prior test overwriting requestCapture before this assertion runs.
 		assert.strictEqual(
-			requestCapture.maxTokens,
+			capturedMaxTokens,
 			40_551,
-			`max_tokens should default to the glm-5-turbo clamp (40_551) but was ${requestCapture.maxTokens}`,
+			`max_tokens should default to the glm-5-turbo clamp (40_551) but was ${capturedMaxTokens}`,
 		)
 	})
 })