diff --git a/backend/src/lib/credits.ts b/backend/src/lib/credits.ts new file mode 100644 index 000000000..77994cd74 --- /dev/null +++ b/backend/src/lib/credits.ts @@ -0,0 +1,86 @@ +import { createServerSupabase } from "./supabase"; + +/** + * Monthly cap on user-initiated LLM messages. Surfaced on the user + * profile as `creditsRemaining`. The historical default was 999999 — + * effectively unlimited — and we preserve that here so this module is + * behaviour-neutral unless the operator opts in by setting + * MONTHLY_MESSAGE_CREDIT_LIMIT to a smaller integer. Tier-based limits + * are intentionally out of scope; once a single env-driven cap exists + * it's straightforward to layer tier overrides on top. + */ +export function monthlyCreditLimit(): number { + const raw = process.env.MONTHLY_MESSAGE_CREDIT_LIMIT; + if (!raw) return 999999; + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed < 0) return 999999; + return parsed; +} + +type Db = ReturnType; + +export type CreditState = { + used: number; + limit: number; + remaining: number; +}; + +/** + * Returns the user's current message-credit balance. Caller is + * responsible for translating a non-positive `remaining` into a 402 + * before the LLM call. + * + * Note: read-only. The 30-day window reset still happens in + * routes/user.ts → loadProfile() when the profile is fetched. We do + * not duplicate that here because the streaming-chat code path doesn't + * fetch the full profile; a small amount of staleness is acceptable + * for the enforcement check (a user can at most spend one extra + * message before their reset is observed by the next /user/profile + * fetch). + */ +export async function getCreditState( + userId: string, + db: Db, +): Promise { + const limit = monthlyCreditLimit(); + const { data } = await db + .from("user_profiles") + .select("message_credits_used") + .eq("user_id", userId) + .maybeSingle(); + const used = Number((data as { message_credits_used?: number } | null)?.message_credits_used ?? 0); + const safeUsed = Number.isFinite(used) ? used : 0; + return { used: safeUsed, limit, remaining: Math.max(limit - safeUsed, 0) }; +} + +/** + * Increment the user's message-credit counter by `n` (default 1). + * Called exactly once per successful user-initiated LLM message — not + * per tool turn — so the counter reflects user-visible message volume. + * + * We do a read-then-write because postgrest doesn't expose an atomic + * increment expression. Two near-simultaneous requests can therefore + * under-count by one; that's acceptable for a soft budget. If hard + * accounting is needed later, swap this for an `rpc('inc_credits', ...)` + * stored procedure. + */ +export async function incrementMessageCredits( + userId: string, + db: Db, + n = 1, +): Promise { + const { data } = await db + .from("user_profiles") + .select("message_credits_used") + .eq("user_id", userId) + .maybeSingle(); + const current = Number((data as { message_credits_used?: number } | null)?.message_credits_used ?? 0); + const next = (Number.isFinite(current) ? current : 0) + n; + await db + .from("user_profiles") + .update({ + message_credits_used: next, + updated_at: new Date().toISOString(), + }) + .eq("user_id", userId); +} diff --git a/backend/src/routes/chat.ts b/backend/src/routes/chat.ts index 9a39e0a9b..c917e4e87 100644 --- a/backend/src/routes/chat.ts +++ b/backend/src/routes/chat.ts @@ -13,6 +13,7 @@ import { import { completeText } from "../lib/llm"; import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { getCreditState, incrementMessageCredits } from "../lib/credits"; export const chatRouter = Router(); @@ -511,6 +512,20 @@ chatRouter.post("/", requireAuth, async (req, res) => { devLog("[chat/stream] resolved chatId", chatId); + // Pre-call budget check. We reject before doing any LLM work so a + // user who has run out can't spend tokens; the increment after a + // successful stream is what bumps the counter. Default cap is + // 999999 (set via MONTHLY_MESSAGE_CREDIT_LIMIT) so this is a no-op + // unless the operator configures a smaller limit. + const creditState = await getCreditState(userId, db); + if (creditState.remaining <= 0) { + return void res.status(402).json({ + detail: "Monthly message credit limit reached.", + creditsUsed: creditState.used, + creditsLimit: creditState.limit, + }); + } + const lastUser = [...messages].reverse().find((m) => m.role === "user"); if (lastUser) { await db.from("chat_messages").insert({ @@ -587,6 +602,11 @@ chatRouter.post("/", requireAuth, async (req, res) => { annotations: annotations.length ? annotations : null, }); + // Bump the monthly counter exactly once per successful + // user-initiated message — not per tool turn — so the user- + // visible "credits remaining" reflects message volume. + await incrementMessageCredits(userId, db); + if (!chatTitle && lastUser?.content) { await db .from("chats") diff --git a/backend/src/routes/projectChat.ts b/backend/src/routes/projectChat.ts index 5e2996152..f83142481 100644 --- a/backend/src/routes/projectChat.ts +++ b/backend/src/routes/projectChat.ts @@ -13,6 +13,7 @@ import { } from "../lib/chatTools"; import { getUserApiKeys } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { getCreditState, incrementMessageCredits } from "../lib/credits"; const PROJECT_SYSTEM_PROMPT_EXTRA = `PROJECT CONTEXT: You are operating within a project folder that contains a collection of legal documents the user has organised for a single matter. The user's questions will usually refer to one or more documents in this project — your job is to find the relevant files to work on. Use list_documents to see what is available and fetch_documents / read_document to pull in any documents you need before answering. @@ -50,6 +51,18 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { if (!projectAccess.ok) return void res.status(404).json({ detail: "Project not found" }); + // Pre-call budget check. Default cap is 999999 (set via + // MONTHLY_MESSAGE_CREDIT_LIMIT) so this is a no-op unless the + // operator configures a smaller limit. + const creditState = await getCreditState(userId, db); + if (creditState.remaining <= 0) { + return void res.status(402).json({ + detail: "Monthly message credit limit reached.", + creditsUsed: creditState.used, + creditsLimit: creditState.limit, + }); + } + let chatId = chat_id ?? null; let chatTitle: string | null = null; @@ -179,6 +192,10 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { annotations: annotations.length ? annotations : null, }); + // Bump the monthly counter exactly once per successful + // user-initiated message — not per tool turn. + await incrementMessageCredits(userId, db); + if (!chatTitle && lastUser?.content) { await db .from("chats") diff --git a/backend/src/routes/user.ts b/backend/src/routes/user.ts index 0df2021d6..d42d075c7 100644 --- a/backend/src/routes/user.ts +++ b/backend/src/routes/user.ts @@ -9,11 +9,10 @@ import { normalizeApiKeyProvider, saveUserApiKey, } from "../lib/userApiKeys"; +import { monthlyCreditLimit } from "../lib/credits"; export const userRouter = Router(); -const MONTHLY_CREDIT_LIMIT = 999999; - type UserProfileRow = { display_name: string | null; organisation: string | null; @@ -33,7 +32,7 @@ function serializeProfile( organisation: row.organisation, messageCreditsUsed: creditsUsed, creditsResetDate: row.credits_reset_date, - creditsRemaining: Math.max(MONTHLY_CREDIT_LIMIT - creditsUsed, 0), + creditsRemaining: Math.max(monthlyCreditLimit() - creditsUsed, 0), tier: row.tier || "Free", tabularModel: resolveModel(row.tabular_model, DEFAULT_TABULAR_MODEL), ...(apiKeyStatus ? { apiKeyStatus } : {}),