From 494af968e938a9335efdf81a173c98e424bd0980 Mon Sep 17 00:00:00 2001 From: shawn Date: Tue, 19 May 2026 23:41:35 +0100 Subject: [PATCH] fix: enforce monthly message-credit limit before chat LLM calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user_profiles.message_credits_used is surfaced on /user/profile as `creditsRemaining`, but on main today (a) no code increments it after an LLM call, so it's always 0, and (b) no code checks it before an LLM call, so the value is informational only. The "credits remaining" shown in the UI is therefore a no-op gauge. Wire the field up: - New backend/src/lib/credits.ts with: * monthlyCreditLimit() — reads MONTHLY_MESSAGE_CREDIT_LIMIT from the env, defaulting to 999999 (the constant previously hard-coded in routes/user.ts). Behaviour-neutral unless an operator opts in. * getCreditState(userId, db) — { used, limit, remaining } for the pre-call check; read-only, doesn't fetch the full profile. * incrementMessageCredits(userId, db, n=1) — bumps the counter; one call per user-initiated message, not per tool turn, so the gauge reflects user-visible message volume. - POST /chat and POST /projects/:projectId/chat now: * Reject with 402 + { creditsUsed, creditsLimit } if remaining <= 0, before flushing response headers (so the client sees a clean error instead of a half-streamed response). * Increment after a successful runLLMStream + assistant-message insert. Failures don't count against the user. - routes/user.ts now imports monthlyCreditLimit() instead of holding its own copy of the constant, so the env-driven limit is the single source of truth. Tabular and workflow LLM call sites are left for a follow-up — the two streaming chat routes are the most user-visible entry points and adding the rest is a wider, more invasive change. --- backend/src/lib/credits.ts | 86 +++++++++++++++++++++++++++++++ backend/src/routes/chat.ts | 20 +++++++ backend/src/routes/projectChat.ts | 17 ++++++ backend/src/routes/user.ts | 5 +- 4 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 backend/src/lib/credits.ts diff --git a/backend/src/lib/credits.ts b/backend/src/lib/credits.ts new file mode 100644 index 000000000..77994cd74 --- /dev/null +++ b/backend/src/lib/credits.ts @@ -0,0 +1,86 @@ +import { createServerSupabase } from "./supabase"; + +/** + * Monthly cap on user-initiated LLM messages. Surfaced on the user + * profile as `creditsRemaining`. The historical default was 999999 — + * effectively unlimited — and we preserve that here so this module is + * behaviour-neutral unless the operator opts in by setting + * MONTHLY_MESSAGE_CREDIT_LIMIT to a smaller integer. Tier-based limits + * are intentionally out of scope; once a single env-driven cap exists + * it's straightforward to layer tier overrides on top. + */ +export function monthlyCreditLimit(): number { + const raw = process.env.MONTHLY_MESSAGE_CREDIT_LIMIT; + if (!raw) return 999999; + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed < 0) return 999999; + return parsed; +} + +type Db = ReturnType; + +export type CreditState = { + used: number; + limit: number; + remaining: number; +}; + +/** + * Returns the user's current message-credit balance. Caller is + * responsible for translating a non-positive `remaining` into a 402 + * before the LLM call. + * + * Note: read-only. The 30-day window reset still happens in + * routes/user.ts → loadProfile() when the profile is fetched. We do + * not duplicate that here because the streaming-chat code path doesn't + * fetch the full profile; a small amount of staleness is acceptable + * for the enforcement check (a user can at most spend one extra + * message before their reset is observed by the next /user/profile + * fetch). + */ +export async function getCreditState( + userId: string, + db: Db, +): Promise { + const limit = monthlyCreditLimit(); + const { data } = await db + .from("user_profiles") + .select("message_credits_used") + .eq("user_id", userId) + .maybeSingle(); + const used = Number((data as { message_credits_used?: number } | null)?.message_credits_used ?? 0); + const safeUsed = Number.isFinite(used) ? used : 0; + return { used: safeUsed, limit, remaining: Math.max(limit - safeUsed, 0) }; +} + +/** + * Increment the user's message-credit counter by `n` (default 1). + * Called exactly once per successful user-initiated LLM message — not + * per tool turn — so the counter reflects user-visible message volume. + * + * We do a read-then-write because postgrest doesn't expose an atomic + * increment expression. Two near-simultaneous requests can therefore + * under-count by one; that's acceptable for a soft budget. If hard + * accounting is needed later, swap this for an `rpc('inc_credits', ...)` + * stored procedure. + */ +export async function incrementMessageCredits( + userId: string, + db: Db, + n = 1, +): Promise { + const { data } = await db + .from("user_profiles") + .select("message_credits_used") + .eq("user_id", userId) + .maybeSingle(); + const current = Number((data as { message_credits_used?: number } | null)?.message_credits_used ?? 0); + const next = (Number.isFinite(current) ? current : 0) + n; + await db + .from("user_profiles") + .update({ + message_credits_used: next, + updated_at: new Date().toISOString(), + }) + .eq("user_id", userId); +} diff --git a/backend/src/routes/chat.ts b/backend/src/routes/chat.ts index 9a39e0a9b..c917e4e87 100644 --- a/backend/src/routes/chat.ts +++ b/backend/src/routes/chat.ts @@ -13,6 +13,7 @@ import { import { completeText } from "../lib/llm"; import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { getCreditState, incrementMessageCredits } from "../lib/credits"; export const chatRouter = Router(); @@ -511,6 +512,20 @@ chatRouter.post("/", requireAuth, async (req, res) => { devLog("[chat/stream] resolved chatId", chatId); + // Pre-call budget check. We reject before doing any LLM work so a + // user who has run out can't spend tokens; the increment after a + // successful stream is what bumps the counter. Default cap is + // 999999 (set via MONTHLY_MESSAGE_CREDIT_LIMIT) so this is a no-op + // unless the operator configures a smaller limit. + const creditState = await getCreditState(userId, db); + if (creditState.remaining <= 0) { + return void res.status(402).json({ + detail: "Monthly message credit limit reached.", + creditsUsed: creditState.used, + creditsLimit: creditState.limit, + }); + } + const lastUser = [...messages].reverse().find((m) => m.role === "user"); if (lastUser) { await db.from("chat_messages").insert({ @@ -587,6 +602,11 @@ chatRouter.post("/", requireAuth, async (req, res) => { annotations: annotations.length ? annotations : null, }); + // Bump the monthly counter exactly once per successful + // user-initiated message — not per tool turn — so the user- + // visible "credits remaining" reflects message volume. + await incrementMessageCredits(userId, db); + if (!chatTitle && lastUser?.content) { await db .from("chats") diff --git a/backend/src/routes/projectChat.ts b/backend/src/routes/projectChat.ts index 5e2996152..f83142481 100644 --- a/backend/src/routes/projectChat.ts +++ b/backend/src/routes/projectChat.ts @@ -13,6 +13,7 @@ import { } from "../lib/chatTools"; import { getUserApiKeys } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { getCreditState, incrementMessageCredits } from "../lib/credits"; const PROJECT_SYSTEM_PROMPT_EXTRA = `PROJECT CONTEXT: You are operating within a project folder that contains a collection of legal documents the user has organised for a single matter. The user's questions will usually refer to one or more documents in this project — your job is to find the relevant files to work on. Use list_documents to see what is available and fetch_documents / read_document to pull in any documents you need before answering. @@ -50,6 +51,18 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { if (!projectAccess.ok) return void res.status(404).json({ detail: "Project not found" }); + // Pre-call budget check. Default cap is 999999 (set via + // MONTHLY_MESSAGE_CREDIT_LIMIT) so this is a no-op unless the + // operator configures a smaller limit. + const creditState = await getCreditState(userId, db); + if (creditState.remaining <= 0) { + return void res.status(402).json({ + detail: "Monthly message credit limit reached.", + creditsUsed: creditState.used, + creditsLimit: creditState.limit, + }); + } + let chatId = chat_id ?? null; let chatTitle: string | null = null; @@ -179,6 +192,10 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { annotations: annotations.length ? annotations : null, }); + // Bump the monthly counter exactly once per successful + // user-initiated message — not per tool turn. + await incrementMessageCredits(userId, db); + if (!chatTitle && lastUser?.content) { await db .from("chats") diff --git a/backend/src/routes/user.ts b/backend/src/routes/user.ts index 0df2021d6..d42d075c7 100644 --- a/backend/src/routes/user.ts +++ b/backend/src/routes/user.ts @@ -9,11 +9,10 @@ import { normalizeApiKeyProvider, saveUserApiKey, } from "../lib/userApiKeys"; +import { monthlyCreditLimit } from "../lib/credits"; export const userRouter = Router(); -const MONTHLY_CREDIT_LIMIT = 999999; - type UserProfileRow = { display_name: string | null; organisation: string | null; @@ -33,7 +32,7 @@ function serializeProfile( organisation: row.organisation, messageCreditsUsed: creditsUsed, creditsResetDate: row.credits_reset_date, - creditsRemaining: Math.max(MONTHLY_CREDIT_LIMIT - creditsUsed, 0), + creditsRemaining: Math.max(monthlyCreditLimit() - creditsUsed, 0), tier: row.tier || "Free", tabularModel: resolveModel(row.tabular_model, DEFAULT_TABULAR_MODEL), ...(apiKeyStatus ? { apiKeyStatus } : {}),