Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLI/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"node": ">=18.0.0"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.32.0",
"@anthropic-ai/sdk": "^0.92.0",
"ajv": "^8.17.1",
"chalk": "^5.3.0",
"commander": "^12.1.0",
Expand Down
77 changes: 49 additions & 28 deletions CLI/src/core/anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
/**
* Anthropic API Client Module
*
* Wrapper around the official Anthropic SDK with retry logic and streaming support.
* Wrapper around the official Anthropic SDK with retry logic, streaming support,
* and prompt caching for cost-efficient repeated system prompt usage.
*/

import Anthropic from '@anthropic-ai/sdk';

Check failure on line 8 in CLI/src/core/anthropic.ts

View workflow job for this annotation

GitHub Actions / Test

src/core/anthropic.test.ts

Error: Cannot find package '@anthropic-ai/sdk' imported from '/home/runner/work/AppFactory/AppFactory/CLI/src/core/anthropic.ts' ❯ src/core/anthropic.ts:8:1 ❯ src/core/anthropic.test.ts:7:1 ⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯ Serialized Error: { code: 'ERR_MODULE_NOT_FOUND' }
import { logger } from './logging.js';

// Configuration from environment
Expand All @@ -16,7 +17,7 @@
}

// Default configuration values
const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
const DEFAULT_MODEL = 'claude-sonnet-4-6';
const DEFAULT_MAX_TOKENS = 16000;
const DEFAULT_TEMPERATURE = 0.3;
const MAX_RETRIES = 3;
Expand Down Expand Up @@ -77,6 +78,40 @@
return new Promise((resolve) => setTimeout(resolve, ms));
}

/**
* Build a cacheable system parameter from a system prompt string.
* Attaches ephemeral cache_control so repeated pipeline calls reuse the
* cached prompt and avoid re-tokenising the same large system instructions.
*/
function buildSystemParam(
systemPrompt: string
): Anthropic.Messages.TextBlockParam[] {
return [
{
type: 'text',
text: systemPrompt,
cache_control: { type: 'ephemeral' },
},
];
}

/**
* Check whether an error should trigger a retry attempt.
*/
function isRetryableError(err: unknown): boolean {
if (err instanceof Anthropic.RateLimitError) return true;
// InternalServerError covers 503 overloaded and 529 too-many-requests
if (err instanceof Anthropic.InternalServerError) return true;
// Fallback for non-SDK error shapes
const msg = err instanceof Error ? err.message : String(err);
return (
msg.includes('rate') ||
msg.includes('429') ||
msg.includes('overloaded') ||
msg.includes('503')
);
}

/**
* Extract JSON from a Claude response that may contain markdown code blocks
*/
Expand Down Expand Up @@ -133,7 +168,7 @@
model: fullConfig.model,
max_tokens: fullConfig.maxTokens,
temperature: fullConfig.temperature,
...(systemPrompt && { system: systemPrompt }),
...(systemPrompt && { system: buildSystemParam(systemPrompt) }),
messages: [{ role: 'user', content: prompt }],
});

Expand All @@ -149,28 +184,17 @@
return responseText;
} catch (err) {
lastError = err as Error;
const errorMessage = lastError.message || String(err);

// Check for rate limiting
if (errorMessage.includes('rate') || errorMessage.includes('429')) {
if (isRetryableError(err)) {
logger.warn(
`Rate limited, waiting ${RETRY_DELAY_MS * attempt}ms before retry ${attempt}/${MAX_RETRIES}`
);
await sleep(RETRY_DELAY_MS * attempt);
continue;
}

// Check for overloaded
if (errorMessage.includes('overloaded') || errorMessage.includes('503')) {
logger.warn(
`API overloaded, waiting ${RETRY_DELAY_MS * attempt}ms before retry ${attempt}/${MAX_RETRIES}`
`API unavailable, waiting ${RETRY_DELAY_MS * attempt}ms before retry ${attempt}/${MAX_RETRIES}`
);
await sleep(RETRY_DELAY_MS * attempt);
continue;
}

// Other errors - don't retry
logger.apiError(errorMessage);
logger.apiError(lastError.message || String(err));
throw err;
}
}
Expand Down Expand Up @@ -225,21 +249,18 @@
model: fullConfig.model,
max_tokens: fullConfig.maxTokens,
temperature: fullConfig.temperature,
...(systemPrompt && { system: systemPrompt }),
...(systemPrompt && { system: buildSystemParam(systemPrompt) }),
messages: [{ role: 'user', content: prompt }],
});

for await (const event of stream) {
if (event.type === 'content_block_delta') {
const delta = event.delta;
if ('text' in delta) {
fullResponse += delta.text;
if (onChunk) {
onChunk(delta.text);
}
}
stream.on('text', (text) => {
fullResponse += text;
if (onChunk) {
onChunk(text);
}
}
});

await stream.finalMessage();

return fullResponse;
}
Loading
Loading