From 0ff6ff139a6b252d471b5f6fe7ca2e786c248099 Mon Sep 17 00:00:00 2001 From: Dirk Date: Fri, 19 Jun 2026 09:06:55 +0200 Subject: [PATCH 1/3] feat(api): add opt-in per-token auth identity cache Every REST request through AuthGuard re-runs the full identity pipeline (RS256 verify + 3-4 Mango user lookups + a lastLogin write + accessMap projection). The socket handshake amortizes this per connection; the REST path does not, so a busy authenticated client hammers resolveIdentity and the DB. Add IdentityCacheService, a transport-agnostic cache in front of AuthIdentityService.resolveOrDefault, used by both the REST guard and the socket handshake. It caches the resolved membership + identity keyed by a sha256 of the token and re-derives the accessMap live from PermissionSystem on each hit, so an ACL (groupUpdate) change needs no invalidation. Design notes: - TTL = min(token exp, IDENTITY_CACHE_TTL_MS): a hit never serves a token past expiry; a different/expired token misses and is fully verified. - Bounded by a maxEntries cap + lazy/sweep eviction, no timers (BoundedTtlCache, reusing the strikeLimiter pattern). - Invalidate on User memberOf removal (PermissionChange DeleteCmd), AuthProvider/AutoGroupMappings changes, and DB disconnect. Not on groupUpdate (accessMap is re-projected) or plain User updates, so lastLogin writes never evict. - lastLogin now updates once per TTL per instance instead of per request. - Ships disabled (IDENTITY_CACHE_ENABLED=false); enable per environment. - Keeps all auth resolution behind one seam, easing a future SSE transport. Co-Authored-By: Claude Opus 4.8 --- api/.env.example | 9 + api/src/app.module.ts | 2 + api/src/auth/auth.guard.spec.ts | 9 +- api/src/auth/auth.guard.ts | 9 +- api/src/auth/authIdentity.service.spec.ts | 11 +- api/src/auth/boundedTtlCache.spec.ts | 86 +++++++++ api/src/auth/boundedTtlCache.ts | 72 ++++++++ api/src/auth/identityCache.service.spec.ts | 193 +++++++++++++++++++++ api/src/auth/identityCache.service.ts | 163 +++++++++++++++++ api/src/configuration.ts | 27 +++ api/src/socketio.ts | 6 +- api/src/test/testingModule.ts | 4 + 12 files changed, 581 insertions(+), 10 deletions(-) create mode 100644 api/src/auth/boundedTtlCache.spec.ts create mode 100644 api/src/auth/boundedTtlCache.ts create mode 100644 api/src/auth/identityCache.service.spec.ts create mode 100644 api/src/auth/identityCache.service.ts diff --git a/api/.env.example b/api/.env.example index caf55523cc..96f76fdc43 100644 --- a/api/.env.example +++ b/api/.env.example @@ -41,6 +41,15 @@ QUERY_RATE_LIMIT_STRIKE_DECAY_MS=600000 # traffic shares one bucket (the proxy's IP). TRUST_PROXY=false +# In-memory per-token auth identity cache. Default OFF — enable per environment to skip +# the full identity resolve (JWT verify + user lookups + lastLogin write) on repeat +# requests with the same token. The effective TTL is min(IDENTITY_CACHE_TTL_MS, the +# token's own exp), so a cached identity is never served past expiry. MAX_ENTRIES is an +# OOM guardrail (size to peak concurrent active tokens per API instance). +IDENTITY_CACHE_ENABLED=false +IDENTITY_CACHE_TTL_MS=300000 +IDENTITY_CACHE_MAX_ENTRIES=50000 + # Mapping between Auth0 Javascript Web Token data and group and user assignments. All mappings are optional. JWT_MAPPINGS='{ "groups": { diff --git a/api/src/app.module.ts b/api/src/app.module.ts index caadaa8f09..7850a53abe 100644 --- a/api/src/app.module.ts +++ b/api/src/app.module.ts @@ -18,6 +18,7 @@ import { FtsSearchService } from "./endpoints/ftsSearch.service"; import { FtsSearchController } from "./endpoints/ftsSearch.controller"; import { StorageStatusController } from "./endpoints/storageStatus.controller"; import { AuthIdentityService } from "./auth/authIdentity.service"; +import { IdentityCacheService } from "./auth/identityCache.service"; import { QueryRateLimiterService } from "./ratelimit/queryRateLimiter.service"; let winstonTransport: winston.transport; @@ -71,6 +72,7 @@ if (!process.env.NODE_ENV || process.env.NODE_ENV === "development") { FtsSearchService, ChangeRequestService, AuthIdentityService, + IdentityCacheService, ], }) export class AppModule {} diff --git a/api/src/auth/auth.guard.spec.ts b/api/src/auth/auth.guard.spec.ts index 20a1d885bf..5373fe114f 100644 --- a/api/src/auth/auth.guard.spec.ts +++ b/api/src/auth/auth.guard.spec.ts @@ -1,10 +1,13 @@ import { AuthGuard } from "./auth.guard"; -import { AuthIdentityService } from "./authIdentity.service"; +import { IdentityCacheService } from "./identityCache.service"; import type { IdentityResult } from "./authIdentity.service"; describe("AuthGuard", () => { let guard: AuthGuard; - let authIdentityService: Partial; + // The guard now resolves through IdentityCacheService (a passthrough to + // AuthIdentityService.resolveOrDefault when the cache is disabled); the mock + // exposes the same resolveOrDefault contract. + let authIdentityService: Partial; const defaultUserDetails = { groups: ["group-public-users"], @@ -15,7 +18,7 @@ describe("AuthGuard", () => { authIdentityService = { resolveOrDefault: jest.fn(), }; - guard = new AuthGuard(authIdentityService as AuthIdentityService); + guard = new AuthGuard(authIdentityService as IdentityCacheService); }); function createMockContext(authHeader?: string, providerId?: string) { diff --git a/api/src/auth/auth.guard.ts b/api/src/auth/auth.guard.ts index 73d401ed18..ee280500e0 100644 --- a/api/src/auth/auth.guard.ts +++ b/api/src/auth/auth.guard.ts @@ -1,6 +1,7 @@ import { CanActivate, ExecutionContext, Injectable } from "@nestjs/common"; import { FastifyRequest } from "fastify"; -import { AuthIdentityService, JwtUserDetails } from "./authIdentity.service"; +import { JwtUserDetails } from "./authIdentity.service"; +import { IdentityCacheService } from "./identityCache.service"; declare module "fastify" { interface FastifyRequest { @@ -10,14 +11,16 @@ declare module "fastify" { @Injectable() export class AuthGuard implements CanActivate { - constructor(private authIdentityService: AuthIdentityService) {} + // Resolve through the identity cache (transport-agnostic; a pure passthrough to + // AuthIdentityService.resolveOrDefault when the cache is disabled). + constructor(private identityCacheService: IdentityCacheService) {} async canActivate(context: ExecutionContext): Promise { const request = context.switchToHttp().getRequest(); const token = this.extractTokenFromHeader(request); const providerId = request.headers["x-auth-provider-id"] as string; - const result = await this.authIdentityService.resolveOrDefault(token, providerId); + const result = await this.identityCacheService.resolveOrDefault(token, providerId); request.user = result.userDetails; return true; } diff --git a/api/src/auth/authIdentity.service.spec.ts b/api/src/auth/authIdentity.service.spec.ts index 5d2de9a4e6..ff0935fd2e 100644 --- a/api/src/auth/authIdentity.service.spec.ts +++ b/api/src/auth/authIdentity.service.spec.ts @@ -465,6 +465,7 @@ describe("AuthIdentityService", () => { // ── AuthGuard integration ──────────────────────────────────────────────────── import { AuthGuard } from "./auth.guard"; +import { IdentityCacheService } from "./identityCache.service"; jest.mock("jwks-rsa", () => { return Object.assign( @@ -500,7 +501,15 @@ describe("AuthGuard (Integrated)", () => { }; authIdentityService = new AuthIdentityService(mockJwtService, mockDbService); - guard = new AuthGuard(authIdentityService); + // The guard resolves through IdentityCacheService; with the cache disabled + // (config returns no "identityCache") it's a passthrough to the real service, + // so this still exercises the full guard → AuthIdentityService path. + const identityCacheService = new IdentityCacheService( + authIdentityService, + { get: () => undefined } as any, + { on: jest.fn() } as any, + ); + guard = new AuthGuard(identityCacheService); }); it("should fall back to default groups when no email in token and no user found by identity", async () => { diff --git a/api/src/auth/boundedTtlCache.spec.ts b/api/src/auth/boundedTtlCache.spec.ts new file mode 100644 index 0000000000..3880ef801f --- /dev/null +++ b/api/src/auth/boundedTtlCache.spec.ts @@ -0,0 +1,86 @@ +import { BoundedTtlCache, BoundedTtlCacheOptions } from "./boundedTtlCache"; + +describe("BoundedTtlCache", () => { + let nowMs: number; + const now = () => nowMs; + + const make = (over: Partial = {}) => + new BoundedTtlCache({ now, ...over }); + + beforeEach(() => { + nowMs = 0; + }); + + it("returns undefined for an unknown key", () => { + const c = make(); + expect(c.get("nope")).toBeUndefined(); + }); + + it("returns a stored value within its TTL", () => { + const c = make(); + c.set("k", "v", 1000); + expect(c.get("k")).toBe("v"); + }); + + it("expires a value once its TTL elapses, and drops it lazily on get", () => { + const c = make(); + c.set("k", "v", 1000); + nowMs += 1000; // expiresAt (0 + 1000) <= now → expired + expect(c.get("k")).toBeUndefined(); + expect(c.size).toBe(0); + }); + + it("treats a non-positive ttl as a no-op (does not cache)", () => { + const c = make(); + c.set("k", "v", 0); + c.set("k2", "v", -5); + expect(c.get("k")).toBeUndefined(); + expect(c.size).toBe(0); + }); + + it("overwrites an existing key without growing size", () => { + const c = make(); + c.set("k", "v1", 1000); + c.set("k", "v2", 1000); + expect(c.get("k")).toBe("v2"); + expect(c.size).toBe(1); + }); + + it("sweeps expired entries when maxEntries is reached on insert", () => { + const c = make({ maxEntries: 2 }); + c.set("a", "v", 1000); + c.set("b", "v", 1000); + expect(c.size).toBe(2); + nowMs += 1000; // a and b now expired + c.set("c", "v", 1000); // size >= maxEntries → sweep first + expect(c.size).toBe(1); + expect(c.get("c")).toBe("v"); + expect(c.get("a")).toBeUndefined(); + }); + + it("clear() drops everything", () => { + const c = make(); + c.set("a", "v", 1000); + c.set("b", "v", 1000); + c.clear(); + expect(c.size).toBe(0); + }); + + it("delete() removes a single key", () => { + const c = make(); + c.set("a", "v", 1000); + c.set("b", "v", 1000); + c.delete("a"); + expect(c.get("a")).toBeUndefined(); + expect(c.get("b")).toBe("v"); + }); + + it("keeps keys independent", () => { + const c = make(); + c.set("a", "va", 1000); + c.set("b", "vb", 5000); + nowMs += 1000; // a expired, b still live + expect(c.get("a")).toBeUndefined(); + expect(c.get("b")).toBe("vb"); + }); +}); diff --git a/api/src/auth/boundedTtlCache.ts b/api/src/auth/boundedTtlCache.ts new file mode 100644 index 0000000000..628fa31174 --- /dev/null +++ b/api/src/auth/boundedTtlCache.ts @@ -0,0 +1,72 @@ +export type BoundedTtlCacheOptions = { + /** Soft cap on tracked entries; a sweep runs when exceeded. Default 50_000. */ + maxEntries?: number; + /** Injectable clock (ms). Defaults to Date.now. */ + now?: () => number; +}; + +type Entry = { + value: V; + expiresAt: number; +}; + +/** + * Minimal in-memory key→value cache with per-entry TTL and a bounded entry count. + * + * Same shape as {@link ../ratelimit/strikeLimiter.StrikeLimiter}: no background + * timers (which would show up as open handles in tests / shutdown). Entries expire + * lazily — an expired entry is dropped on the `get()` that next touches it — and a + * `sweep()` of all expired entries runs before an insert once `maxEntries` is reached. + * + * `maxEntries` is a soft OOM guardrail, not a working-set limiter: the sweep only + * removes already-expired entries, so the map can briefly exceed the cap if every + * entry is still live. TTL is what bounds the steady-state size. + */ +export class BoundedTtlCache { + private readonly maxEntries: number; + private readonly now: () => number; + private readonly entries = new Map>(); + + constructor(options: BoundedTtlCacheOptions = {}) { + this.maxEntries = options.maxEntries ?? 50_000; + this.now = options.now ?? Date.now; + } + + /** Return the cached value, or undefined if missing or expired (dropping it). */ + get(key: string): V | undefined { + const entry = this.entries.get(key); + if (!entry) return undefined; + if (entry.expiresAt <= this.now()) { + this.entries.delete(key); // lazy eviction + return undefined; + } + return entry.value; + } + + /** Cache `value` under `key` for `ttlMs`. A non-positive ttl is a no-op. */ + set(key: string, value: V, ttlMs: number): void { + if (ttlMs <= 0) return; + if (!this.entries.has(key) && this.entries.size >= this.maxEntries) this.sweep(); + this.entries.set(key, { value, expiresAt: this.now() + ttlMs }); + } + + delete(key: string): void { + this.entries.delete(key); + } + + clear(): void { + this.entries.clear(); + } + + /** Drop every entry whose TTL has elapsed. */ + sweep(): void { + const now = this.now(); + for (const [key, entry] of this.entries) { + if (entry.expiresAt <= now) this.entries.delete(key); + } + } + + get size(): number { + return this.entries.size; + } +} diff --git a/api/src/auth/identityCache.service.spec.ts b/api/src/auth/identityCache.service.spec.ts new file mode 100644 index 0000000000..0fde83b8a6 --- /dev/null +++ b/api/src/auth/identityCache.service.spec.ts @@ -0,0 +1,193 @@ +import { EventEmitter } from "events"; +import { ConfigService } from "@nestjs/config"; +import { IdentityCacheService } from "./identityCache.service"; +import { AuthIdentityService, IdentityResult } from "./authIdentity.service"; +import { PermissionSystem } from "../permissions/permissions.service"; +import { DbService } from "../db/db.service"; +import { DeleteReason, DocType } from "../enums"; + +const CONFIG_KEY = "identityCache"; + +function authResult(over: Partial = {}): IdentityResult { + return { + status: "authenticated", + userDetails: { + groups: ["g1", "g2"], + userId: "user-1", + email: "a@b.com", + name: "A", + // far-future expiry (seconds) so the entry is cacheable + jwtPayload: { exp: Math.floor(Date.now() / 1000) + 3600 }, + accessMap: new Map() as any, + ...over, + }, + }; +} + +function makeService(cfg: any) { + const authIdentity = { + resolveOrDefault: jest.fn().mockResolvedValue(authResult()), + } as unknown as AuthIdentityService & { resolveOrDefault: jest.Mock }; + + const configService = { + get: (key: string) => (key === CONFIG_KEY ? cfg : undefined), + } as unknown as ConfigService; + + // A real EventEmitter so we can drive invalidation via emit(). + const db = new EventEmitter() as unknown as DbService; + + const svc = new IdentityCacheService(authIdentity, configService, db); + svc.onModuleInit(); + return { svc, authIdentity, db: db as unknown as EventEmitter }; +} + +const enabledCfg = { enabled: true, ttlMs: 300000, maxEntries: 1000 }; + +describe("IdentityCacheService", () => { + let getAccessMapSpy: jest.SpyInstance; + + beforeEach(() => { + getAccessMapSpy = jest + .spyOn(PermissionSystem, "getAccessMap") + .mockReturnValue(new Map() as any); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + describe("passthrough", () => { + it("does not cache when disabled (calls resolveOrDefault every time)", async () => { + const { svc, authIdentity } = makeService({ enabled: false }); + await svc.resolveOrDefault("t", "p"); + await svc.resolveOrDefault("t", "p"); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + }); + + it("passes through the anonymous path (no token) without caching", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + await svc.resolveOrDefault(undefined, undefined); + await svc.resolveOrDefault(undefined, undefined); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledWith(undefined, undefined); + }); + + it("passes through when a token is given without a providerId", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + await svc.resolveOrDefault("t", undefined); + await svc.resolveOrDefault("t", undefined); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + }); + }); + + describe("hit / miss", () => { + it("caches on miss and serves the second request from cache", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + await svc.resolveOrDefault("t", "p"); + await svc.resolveOrDefault("t", "p"); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(1); + }); + + it("re-derives the accessMap live on a hit from the cached groups", async () => { + const { svc } = makeService(enabledCfg); + await svc.resolveOrDefault("t", "p"); // miss → cache + getAccessMapSpy.mockClear(); + + const sentinel = new Map([["live", true]]) as any; + getAccessMapSpy.mockReturnValue(sentinel); + + const res = await svc.resolveOrDefault("t", "p"); // hit + expect(getAccessMapSpy).toHaveBeenCalledWith(["g1", "g2"]); + expect(res.userDetails.accessMap).toBe(sentinel); + expect(res.userDetails.groups).toEqual(["g1", "g2"]); + expect(res.userDetails.userId).toBe("user-1"); + }); + + it("keys distinct tokens independently", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + await svc.resolveOrDefault("t1", "p"); + await svc.resolveOrDefault("t2", "p"); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + }); + + it("does not cache an already-expired token (ttl <= 0)", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + authIdentity.resolveOrDefault.mockResolvedValue( + authResult({ jwtPayload: { exp: Math.floor(Date.now() / 1000) - 10 } }), + ); + await svc.resolveOrDefault("t", "p"); + await svc.resolveOrDefault("t", "p"); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + }); + + it("does not cache thrown auth failures and propagates them", async () => { + const { svc, authIdentity } = makeService(enabledCfg); + authIdentity.resolveOrDefault.mockRejectedValue(new Error("boom")); + await expect(svc.resolveOrDefault("t", "p")).rejects.toThrow("boom"); + await expect(svc.resolveOrDefault("t", "p")).rejects.toThrow("boom"); + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(2); + }); + }); + + describe("invalidation", () => { + const seed = async (svc: IdentityCacheService) => { + await svc.resolveOrDefault("t", "p"); // miss → cache + }; + const isHit = async ( + svc: IdentityCacheService, + authIdentity: { resolveOrDefault: jest.Mock }, + ) => { + authIdentity.resolveOrDefault.mockClear(); + await svc.resolveOrDefault("t", "p"); + return authIdentity.resolveOrDefault.mock.calls.length === 0; + }; + + it("clears on a User PermissionChange DeleteCmd (revocation)", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("update", { + type: DocType.DeleteCmd, + deleteReason: DeleteReason.PermissionChange, + docType: DocType.User, + docId: "user-1", + }); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + + it("clears on an AuthProvider update", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("update", { type: DocType.AuthProvider, _id: "ap1" }); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + + it("clears on an AutoGroupMappings update", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("update", { type: DocType.AutoGroupMappings, _id: "m1" }); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + + it("clears on disconnect", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("disconnect"); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + + it("does NOT clear on a plain User update (e.g. lastLogin write)", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("update", { type: DocType.User, _id: "user-1", lastLogin: 123 }); + expect(await isHit(svc, authIdentity)).toBe(true); + }); + + it("does NOT clear on a groupUpdate-style Group update (accessMap is re-derived live)", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + // Group docs come through `update` too; the service must ignore them for Layer I. + db.emit("update", { type: DocType.Group, _id: "g1" }); + expect(await isHit(svc, authIdentity)).toBe(true); + }); + }); +}); diff --git a/api/src/auth/identityCache.service.ts b/api/src/auth/identityCache.service.ts new file mode 100644 index 0000000000..7935c9b9e8 --- /dev/null +++ b/api/src/auth/identityCache.service.ts @@ -0,0 +1,163 @@ +import { Injectable, OnModuleInit } from "@nestjs/common"; +import { ConfigService } from "@nestjs/config"; +import { createHash } from "crypto"; +import { DbService } from "../db/db.service"; +import { DeleteReason, DocType, Uuid } from "../enums"; +import { PermissionSystem } from "../permissions/permissions.service"; +import { AuthIdentityService, IdentityResult } from "./authIdentity.service"; +import { IdentityCacheConfig } from "../configuration"; +import { BoundedTtlCache } from "./boundedTtlCache"; + +/** + * What we cache per token: the resolved membership + identity, WITHOUT the accessMap. + * The accessMap is re-derived live from {@link PermissionSystem} on every hit so ACL + * (`groupUpdate`) changes are picked up for free without invalidating this cache. + */ +type CachedIdentity = { + status: "authenticated"; + groups: Array; + userId?: Uuid; + email?: string; + name?: string; +}; + +/** + * Transport-agnostic cache in front of {@link AuthIdentityService.resolveOrDefault}. + * + * The full resolve (RS256 verify + 3–4 Mango user lookups + a `lastLogin` write) runs + * once per token per TTL window; subsequent requests with the same token re-derive only + * the (cheap) accessMap projection. Both the REST `AuthGuard` and the socket handshake + * call this; a future SSE handler would call the same method, so SSE reconnect churn + * becomes cache hits instead of re-resolves. + * + * Ships OFF (`IDENTITY_CACHE_ENABLED`). When disabled it is a pure passthrough. + */ +@Injectable() +export class IdentityCacheService implements OnModuleInit { + private readonly enabled: boolean; + private readonly maxAgeMs: number; + private readonly cache?: BoundedTtlCache; + + constructor( + private readonly authIdentityService: AuthIdentityService, + private readonly configService: ConfigService, + private readonly db: DbService, + ) { + const cfg = this.configService.get("identityCache"); + this.enabled = !!cfg?.enabled; + this.maxAgeMs = cfg?.ttlMs ?? 300000; + if (this.enabled) { + this.cache = new BoundedTtlCache({ maxEntries: cfg.maxEntries }); + } + } + + onModuleInit() { + if (!this.enabled || !this.cache) return; + + // Invalidate on the rare events that change resolved membership or token-verification + // rules. NOT `groupUpdate`: we don't cache the accessMap (it's re-derived live), so an + // ACL edit needs no flush here. A plain User `update` (e.g. the `lastLogin` write) is + // intentionally ignored — only a memberOf REMOVAL emits the PermissionChange DeleteCmd + // below, so `lastLogin` churn never evicts the cache. + this.db.on("update", (doc: any) => { + if (!doc?.type) return; + + // Membership revoked: a User doc lost groups → its cached (larger) access is stale. + if ( + doc.type === DocType.DeleteCmd && + doc.deleteReason === DeleteReason.PermissionChange && + doc.docType === DocType.User + ) { + this.cache.clear(); + return; + } + + // Default/dynamic group derivation or token-verification rules changed. + if (doc.type === DocType.AuthProvider || doc.type === DocType.AutoGroupMappings) { + this.cache.clear(); + return; + } + if ( + doc.type === DocType.DeleteCmd && + doc.deleteReason === DeleteReason.Deleted && + (doc.docType === DocType.AuthProvider || doc.docType === DocType.AutoGroupMappings) + ) { + this.cache.clear(); + } + }); + + // Change feed down → we can't trust freshness; drop everything (same rule as the other + // DTO-derived caches: AuthIdentityService, QueryService.languages, S3Service). + this.db.on("disconnect", () => { + this.cache.clear(); + }); + } + + /** + * Cache-fronted equivalent of {@link AuthIdentityService.resolveOrDefault}. Identical + * signature/return so callers are transport-agnostic. Anonymous (no token) and the + * disabled state pass straight through. + */ + async resolveOrDefault(token?: string, providerId?: string): Promise { + if (!this.enabled || !this.cache || !token || !providerId) { + return this.authIdentityService.resolveOrDefault(token, providerId); + } + + const key = this.hashToken(`${providerId}:${token}`); + + const hit = this.cache.get(key); + if (hit) { + // Re-derive the accessMap live so ACL-graph changes are always reflected. + const accessMap = PermissionSystem.getAccessMap(hit.groups); + return { + status: hit.status, + userDetails: { + groups: hit.groups, + userId: hit.userId, + email: hit.email, + name: hit.name, + accessMap, + }, + }; + } + + // Miss: run the full resolve. Auth failures throw here and are NOT cached. + const result = await this.authIdentityService.resolveOrDefault(token, providerId); + + // Only cache authenticated identities, and never past the token's own expiry. + if (result.status === "authenticated") { + const ttlMs = this.computeTtlMs(result.userDetails.jwtPayload?.exp); + if (ttlMs > 0) { + this.cache.set( + key, + { + status: "authenticated", + groups: result.userDetails.groups, + userId: result.userDetails.userId, + email: result.userDetails.email, + name: result.userDetails.name, + }, + ttlMs, + ); + } + } + + return result; + } + + /** + * TTL never exceeds the token's `exp` (a hit must never serve an expired token); it is + * further capped by the configured max age. `exp` is in seconds (JWT). Falls back to the + * max age when the token carries no `exp`. + */ + private computeTtlMs(expSeconds?: number): number { + if (typeof expSeconds !== "number") return this.maxAgeMs; + const untilExpiry = expSeconds * 1000 - Date.now(); + return Math.min(untilExpiry, this.maxAgeMs); + } + + /** SHA-256 of the token so raw JWTs are never held in memory as keys. */ + private hashToken(value: string): string { + return createHash("sha256").update(value).digest("hex"); + } +} diff --git a/api/src/configuration.ts b/api/src/configuration.ts index a430d0ceb6..33cb11be24 100644 --- a/api/src/configuration.ts +++ b/api/src/configuration.ts @@ -54,6 +54,27 @@ export type QueryConfig = { rateLimit: QueryRateLimitConfig; }; +export type IdentityCacheConfig = { + /** + * Master switch for the in-memory per-token auth identity cache. Ships OFF — enable + * per environment once you want to relieve the /query (and other authenticated) paths + * from re-running the full identity resolve on every request. + * Environment variable: IDENTITY_CACHE_ENABLED (default false). + */ + enabled: boolean; + /** + * Max age (ms) of a cached identity. The effective TTL is min(this, token's own exp), + * so a hit never serves a token past expiry. IDENTITY_CACHE_TTL_MS (default 300000 = 5min). + */ + ttlMs: number; + /** + * Soft cap on cached identities (OOM guardrail; lazy/sweep eviction, TTL bounds normal + * volume). Size to peak concurrent active tokens per instance, with headroom. + * IDENTITY_CACHE_MAX_ENTRIES (default 50000). + */ + maxEntries: number; +}; + export type ValidationConfig = { /** * When set to true, query template validation will log warnings instead of throwing exceptions. @@ -104,6 +125,7 @@ export type Configuration = { imageProcessing?: ImageProcessingConfig; socketIo?: SocketIoConfig; validation?: ValidationConfig; + identityCache?: IdentityCacheConfig; }; export default () => @@ -151,4 +173,9 @@ export default () => validation: { bypassTemplateValidation: process.env.BYPASS_TEMPLATE_VALIDATION === "true", } as ValidationConfig, + identityCache: { + enabled: process.env.IDENTITY_CACHE_ENABLED === "true", + ttlMs: parseInt(process.env.IDENTITY_CACHE_TTL_MS, 10) || 300000, + maxEntries: parseInt(process.env.IDENTITY_CACHE_MAX_ENTRIES, 10) || 50000, + } as IdentityCacheConfig, }) as Configuration; diff --git a/api/src/socketio.ts b/api/src/socketio.ts index ed97b4daf0..be421d6980 100644 --- a/api/src/socketio.ts +++ b/api/src/socketio.ts @@ -18,7 +18,7 @@ import { JwtUserDetails } from "./auth/authIdentity.service"; import { S3Service } from "./s3/s3.service"; import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; -import { AuthIdentityService } from "./auth/authIdentity.service"; +import { IdentityCacheService } from "./auth/identityCache.service"; /** * Data request from client type definition @@ -104,7 +104,7 @@ export class Socketio implements OnGatewayInit { private readonly logger: Logger, private db: DbService, private s3: S3Service, - private authIdentityService: AuthIdentityService, + private identityCacheService: IdentityCacheService, ) {} afterInit(server: Server) { @@ -128,7 +128,7 @@ export class Socketio implements OnGatewayInit { } try { - const authIdentity = await this.authIdentityService.resolveOrDefault( + const authIdentity = await this.identityCacheService.resolveOrDefault( token, providerId, ); diff --git a/api/src/test/testingModule.ts b/api/src/test/testingModule.ts index efb1cef260..772a158eae 100644 --- a/api/src/test/testingModule.ts +++ b/api/src/test/testingModule.ts @@ -11,6 +11,7 @@ import { WinstonModule } from "nest-winston"; import * as winston from "winston"; import { S3Service } from "../s3/s3.service"; import { AuthIdentityService } from "../auth/authIdentity.service"; +import { IdentityCacheService } from "../auth/identityCache.service"; export type testingModuleOptions = { dbName?: string; @@ -64,6 +65,9 @@ export async function createTestingModule(testName: string) { }), }, }, + // Real service; config mock returns undefined for "identityCache" so it stays + // disabled (pure passthrough to the mocked AuthIdentityService above). + IdentityCacheService, { provide: ConfigService, useValue: { From ac6ff7683c4915c9c66f63a3620da3a854f056dc Mon Sep 17 00:00:00 2001 From: Dirk Date: Fri, 19 Jun 2026 10:30:32 +0200 Subject: [PATCH 2/3] fix(api): harden per-token auth identity cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the opt-in identity cache, addressing review findings: - Group additions no longer lag: DbService.upsertDoc emits a write-side `permissionChange` event on any non-Group memberOf change (add or remove); IdentityCacheService clears on it for User docs. Pure additions produce no DeleteCmd, so the change feed alone couldn't distinguish them from a lastLogin-style update — the cache keeps ignoring plain User updates, so lastLogin writes never churn it. - Close the user-deletion revocation gap: the Deleted DeleteCmd handler now also covers DocType.User, so a deleted user's still-valid JWT stops resolving from cache immediately instead of after the TTL. - Make the clock consistent: IdentityCacheService takes one injectable `now`, shared with BoundedTtlCache and used in computeTtlMs (was Date.now() directly). - Note in .env.example that membership/deletion invalidate promptly and residual staleness (IdP revocation, provider reassignment) is TTL-bounded. Adds tests for permissionChange (User vs non-User), the Deleted-User revocation, and injected-clock TTL/expiry. lastLogin write cadence is intentionally left unchanged (write-on-miss) and decoupled from invalidation, pending a separate decision. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/.env.example | 3 + api/src/auth/identityCache.service.spec.ts | 76 +++++++++++++++++++++- api/src/auth/identityCache.service.ts | 38 +++++++++-- api/src/db/db.service.ts | 12 ++++ 4 files changed, 120 insertions(+), 9 deletions(-) diff --git a/api/.env.example b/api/.env.example index 96f76fdc43..12d81e6350 100644 --- a/api/.env.example +++ b/api/.env.example @@ -46,6 +46,9 @@ TRUST_PROXY=false # requests with the same token. The effective TTL is min(IDENTITY_CACHE_TTL_MS, the # token's own exp), so a cached identity is never served past expiry. MAX_ENTRIES is an # OOM guardrail (size to peak concurrent active tokens per API instance). +# Group membership changes and user deletion invalidate the cache promptly. The residual +# staleness window is bounded by the TTL — pick a lower TTL where faster propagation of +# token revocation at the IdP or a provider reassignment matters more than the cache hit rate. IDENTITY_CACHE_ENABLED=false IDENTITY_CACHE_TTL_MS=300000 IDENTITY_CACHE_MAX_ENTRIES=50000 diff --git a/api/src/auth/identityCache.service.spec.ts b/api/src/auth/identityCache.service.spec.ts index 0fde83b8a6..0748f29ba4 100644 --- a/api/src/auth/identityCache.service.spec.ts +++ b/api/src/auth/identityCache.service.spec.ts @@ -24,7 +24,7 @@ function authResult(over: Partial = {}): Identity }; } -function makeService(cfg: any) { +function makeService(cfg: any, now?: () => number) { const authIdentity = { resolveOrDefault: jest.fn().mockResolvedValue(authResult()), } as unknown as AuthIdentityService & { resolveOrDefault: jest.Mock }; @@ -36,7 +36,7 @@ function makeService(cfg: any) { // A real EventEmitter so we can drive invalidation via emit(). const db = new EventEmitter() as unknown as DbService; - const svc = new IdentityCacheService(authIdentity, configService, db); + const svc = new IdentityCacheService(authIdentity, configService, db, now); svc.onModuleInit(); return { svc, authIdentity, db: db as unknown as EventEmitter }; } @@ -142,7 +142,21 @@ describe("IdentityCacheService", () => { return authIdentity.resolveOrDefault.mock.calls.length === 0; }; - it("clears on a User PermissionChange DeleteCmd (revocation)", async () => { + it("clears on a User permissionChange event (membership add or remove)", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("permissionChange", { docType: DocType.User, docId: "user-1" }); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + + it("does NOT clear on a non-User permissionChange event", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("permissionChange", { docType: DocType.Post, docId: "post-1" }); + expect(await isHit(svc, authIdentity)).toBe(true); + }); + + it("clears on a User PermissionChange DeleteCmd (feed-based removal)", async () => { const { svc, authIdentity, db } = makeService(enabledCfg); await seed(svc); db.emit("update", { @@ -154,6 +168,18 @@ describe("IdentityCacheService", () => { expect(await isHit(svc, authIdentity)).toBe(false); }); + it("clears on a User Deleted DeleteCmd (account deletion revocation)", async () => { + const { svc, authIdentity, db } = makeService(enabledCfg); + await seed(svc); + db.emit("update", { + type: DocType.DeleteCmd, + deleteReason: DeleteReason.Deleted, + docType: DocType.User, + docId: "user-1", + }); + expect(await isHit(svc, authIdentity)).toBe(false); + }); + it("clears on an AuthProvider update", async () => { const { svc, authIdentity, db } = makeService(enabledCfg); await seed(svc); @@ -190,4 +216,48 @@ describe("IdentityCacheService", () => { expect(await isHit(svc, authIdentity)).toBe(true); }); }); + + describe("clock (injected, shared with the cache)", () => { + const BASE = 1_700_000_000_000; // fixed ms base so exp is relative to the fake clock + + it("expires a hit once the injected clock passes the TTL (capped by maxAge)", async () => { + let clock = BASE; + const { svc, authIdentity } = makeService(enabledCfg, () => clock); + // Far-future exp → TTL is capped by maxAge (300000), not by expiry. + authIdentity.resolveOrDefault.mockResolvedValue( + authResult({ jwtPayload: { exp: Math.floor(BASE / 1000) + 3600 } }), + ); + + await svc.resolveOrDefault("t", "p"); // miss → cache, expiresAt = BASE + 300000 + authIdentity.resolveOrDefault.mockClear(); + + clock = BASE + 299_999; + await svc.resolveOrDefault("t", "p"); // still live → hit + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(0); + + clock = BASE + 300_001; + await svc.resolveOrDefault("t", "p"); // expired → re-resolve + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(1); + }); + + it("caps the TTL at the token's own exp when that is sooner than maxAge", async () => { + let clock = BASE; + const { svc, authIdentity } = makeService(enabledCfg, () => clock); + // exp is 10s out → TTL = min(10000, 300000) = 10000. + authIdentity.resolveOrDefault.mockResolvedValue( + authResult({ jwtPayload: { exp: Math.floor(BASE / 1000) + 10 } }), + ); + + await svc.resolveOrDefault("t", "p"); // miss → cache, expiresAt = BASE + 10000 + authIdentity.resolveOrDefault.mockClear(); + + clock = BASE + 9_000; + await svc.resolveOrDefault("t", "p"); // still live → hit + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(0); + + clock = BASE + 10_001; + await svc.resolveOrDefault("t", "p"); // past exp → re-resolve + expect(authIdentity.resolveOrDefault).toHaveBeenCalledTimes(1); + }); + }); }); diff --git a/api/src/auth/identityCache.service.ts b/api/src/auth/identityCache.service.ts index 7935c9b9e8..161f429352 100644 --- a/api/src/auth/identityCache.service.ts +++ b/api/src/auth/identityCache.service.ts @@ -36,33 +36,53 @@ type CachedIdentity = { export class IdentityCacheService implements OnModuleInit { private readonly enabled: boolean; private readonly maxAgeMs: number; + private readonly now: () => number; private readonly cache?: BoundedTtlCache; constructor( private readonly authIdentityService: AuthIdentityService, private readonly configService: ConfigService, private readonly db: DbService, + // Injectable clock (ms). Shared with the BoundedTtlCache so TTL computation and the + // cache's expiry checks never read different clocks. Defaults to Date.now. + now: () => number = Date.now, ) { + this.now = now; const cfg = this.configService.get("identityCache"); this.enabled = !!cfg?.enabled; this.maxAgeMs = cfg?.ttlMs ?? 300000; if (this.enabled) { - this.cache = new BoundedTtlCache({ maxEntries: cfg.maxEntries }); + this.cache = new BoundedTtlCache({ + maxEntries: cfg.maxEntries, + now: this.now, + }); } } onModuleInit() { if (!this.enabled || !this.cache) return; + // Membership changes (additions AND removals) arrive as a write-side `permissionChange` + // event carrying the changed doc's type. We invalidate on User membership changes here + // rather than off the feed because a pure memberOf ADDITION produces no DeleteCmd — on + // the feed it is indistinguishable from a `lastLogin`-style plain User `update`, which we + // must keep ignoring (otherwise each miss's own `lastLogin` write would evict the entry). + this.db.on("permissionChange", (evt: any) => { + if (evt?.docType === DocType.User) this.cache.clear(); + }); + // Invalidate on the rare events that change resolved membership or token-verification // rules. NOT `groupUpdate`: we don't cache the accessMap (it's re-derived live), so an // ACL edit needs no flush here. A plain User `update` (e.g. the `lastLogin` write) is - // intentionally ignored — only a memberOf REMOVAL emits the PermissionChange DeleteCmd - // below, so `lastLogin` churn never evicts the cache. + // intentionally ignored — membership changes come through `permissionChange` (above) and + // user deletion/provider/mapping changes are handled below, so `lastLogin` churn never + // evicts the cache. Keeping this load-bearing lets the `lastLogin` write cadence stay a + // free, decoupled choice. this.db.on("update", (doc: any) => { if (!doc?.type) return; - // Membership revoked: a User doc lost groups → its cached (larger) access is stale. + // Membership revoked via the feed (DeleteCmd): redundant with `permissionChange` on a + // single instance, but the feed-based path also covers removals on multi-instance. if ( doc.type === DocType.DeleteCmd && doc.deleteReason === DeleteReason.PermissionChange && @@ -77,10 +97,16 @@ export class IdentityCacheService implements OnModuleInit { this.cache.clear(); return; } + // A User/provider/mapping doc was deleted. User deletion flows through the `deleteReq` + // path (no `permissionChange` emit), so this feed handler is the revocation hook for + // it: a deleted user's still-valid JWT must stop resolving from cache with stale + // (over-)access. if ( doc.type === DocType.DeleteCmd && doc.deleteReason === DeleteReason.Deleted && - (doc.docType === DocType.AuthProvider || doc.docType === DocType.AutoGroupMappings) + (doc.docType === DocType.AuthProvider || + doc.docType === DocType.AutoGroupMappings || + doc.docType === DocType.User) ) { this.cache.clear(); } @@ -152,7 +178,7 @@ export class IdentityCacheService implements OnModuleInit { */ private computeTtlMs(expSeconds?: number): number { if (typeof expSeconds !== "number") return this.maxAgeMs; - const untilExpiry = expSeconds * 1000 - Date.now(); + const untilExpiry = expSeconds * 1000 - this.now(); return Math.min(untilExpiry, this.maxAgeMs); } diff --git a/api/src/db/db.service.ts b/api/src/db/db.service.ts index 9ecc5feb98..4f237c3f4d 100644 --- a/api/src/db/db.service.ts +++ b/api/src/db/db.service.ts @@ -117,6 +117,7 @@ const FTS_STALE_READ = { stable: true, update: "lazy" as const }; * * @fires DbService#update - Emitted when any valid document with a type field is updated in the database * @fires DbService#groupUpdate - Emitted when a group document is updated, used by the permission system to update access maps. Also emitted with a `DeleteCmd` payload (docType === Group) when a group is deleted via the soft-delete flow, so the permission system can evict the entry. + * @fires DbService#permissionChange - Emitted (write-side, `{ docType, docId }`) right after a non-Group document's `memberOf` changes — additions and removals alike. Lets in-memory per-identity caches invalidate on membership ADDITIONS too: removals already surface via a `PermissionChange` DeleteCmd on the change feed, but a pure addition produces no DeleteCmd, so the feed alone can't distinguish it from a `lastLogin`-style update. Safe for cache eviction (single-instance); not a substitute for the feed-based handlers on multi-instance. * @fires DbService#disconnect - Emitted when a previously-established DB connection is lost. Consumers should drop cached DTO state that may have diverged while disconnected. * @fires DbService#reconnect - Emitted after a disconnect has been followed by a successful reconnect. Consumers that need a populated cache to function should rehydrate here. * @@ -428,6 +429,10 @@ export class DbService extends EventEmitter { let rev: string; + // Tracks whether this write changes `memberOf` (add or remove) so we can emit a + // `permissionChange` event after the write succeeds — see the `@fires` note above. + let memberOfChanged = false; + // Generate delete command if the document is set to be deleted, and delete the document if (doc.deleteReq) { await this.insertDeleteCmd({ @@ -449,6 +454,7 @@ export class DbService extends EventEmitter { doc.memberOf.sort(), ) ) { + memberOfChanged = true; await this.insertDeleteCmd({ reason: DeleteReason.PermissionChange, doc: doc as _contentBaseDto, @@ -527,6 +533,12 @@ export class DbService extends EventEmitter { const insertResult = await this.insertDoc(docPlain); insertResult.updatedTimeUtc = docPlain.updatedTimeUtc; insertResult.changes = changes; + + // Notify in-memory per-identity caches of a membership change once the write is committed. + if (memberOfChanged) { + this.emit("permissionChange", { docType: doc.type, docId: doc._id }); + } + return insertResult; } From 5687354e4aafbe66dd6a93fe9865ae18301abdad Mon Sep 17 00:00:00 2001 From: Dirk Date: Fri, 19 Jun 2026 10:35:52 +0200 Subject: [PATCH 3/3] fix(api): harden per-token auth identity cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the opt-in identity cache, addressing review findings: - Group additions no longer lag: DbService.upsertDoc emits a write-side `permissionChange` event on any non-Group memberOf change (add or remove); IdentityCacheService clears on it for User docs. A pure addition produces no DeleteCmd, so the change feed alone couldn't distinguish it from a lastLogin-style update — the cache keeps ignoring plain User updates, so lastLogin writes never churn it. - Close the user-deletion revocation gap: the Deleted DeleteCmd handler now also covers DocType.User, so a deleted user's still-valid JWT stops resolving from cache immediately instead of after the TTL. - Make the clock consistent: IdentityCacheService takes one injectable `now`, shared with BoundedTtlCache and used in computeTtlMs (was Date.now() directly). Marked `@Optional()` so Nest DI leaves it unresolved and the Date.now default applies — without it the provider fails to instantiate (app boot and every DB-backed test module). - Note in .env.example that membership/deletion invalidate promptly and residual staleness (IdP revocation, provider reassignment) is TTL-bounded. Adds tests for permissionChange (User vs non-User), the Deleted-User revocation, and injected-clock TTL/expiry. lastLogin write cadence is intentionally left unchanged (write-on-miss) and decoupled from invalidation, pending a separate decision. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/src/auth/identityCache.service.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/src/auth/identityCache.service.ts b/api/src/auth/identityCache.service.ts index 161f429352..eac848276d 100644 --- a/api/src/auth/identityCache.service.ts +++ b/api/src/auth/identityCache.service.ts @@ -1,4 +1,4 @@ -import { Injectable, OnModuleInit } from "@nestjs/common"; +import { Injectable, Optional, OnModuleInit } from "@nestjs/common"; import { ConfigService } from "@nestjs/config"; import { createHash } from "crypto"; import { DbService } from "../db/db.service"; @@ -44,8 +44,8 @@ export class IdentityCacheService implements OnModuleInit { private readonly configService: ConfigService, private readonly db: DbService, // Injectable clock (ms). Shared with the BoundedTtlCache so TTL computation and the - // cache's expiry checks never read different clocks. Defaults to Date.now. - now: () => number = Date.now, + + @Optional() now: () => number = Date.now, ) { this.now = now; const cfg = this.configService.get("identityCache");