diff --git a/.env.example b/.env.example index 5196abaf..28e67e7b 100644 --- a/.env.example +++ b/.env.example @@ -13,6 +13,15 @@ INTERNAL_COMMAND_TOKEN="some-made-up-token" # default to allow indexing for seo safety ALLOW_INDEXING="true" +# server perf logging +PERF_LOGGING="false" +PERF_SLOW_THRESHOLD_MS="250" +PERF_LOG_SAMPLE_RATE="0.05" +PERF_LOG_SKIP_PATHS="/assets,/build,/favicons,/img,/fonts" + +# prisma slow query logging threshold (ms) +PRISMA_QUERY_LOG_THRESHOLD_MS="200" + TWILIO_TOKEN="your_twilio_token" TWILIO_SID="your_twilio_sid" diff --git a/docs/server-performance-audit.md b/docs/server-performance-audit.md new file mode 100644 index 00000000..28b983a6 --- /dev/null +++ b/docs/server-performance-audit.md @@ -0,0 +1,54 @@ +# Server performance audit + +## Quick start + +- Start the app: `bun run dev` +- Benchmark HTTP endpoints: `bun run benchmark:server` +- Benchmark database-heavy flows: `bun run benchmark:perf` +- Seed larger datasets (optional): `bun run benchmark:data` + +## Perf logging (server) + +Enable request timing logs: + +- `PERF_LOGGING=true` +- `PERF_SLOW_THRESHOLD_MS=250` +- `PERF_LOG_SAMPLE_RATE=0.05` +- `PERF_LOG_SKIP_PATHS=/assets,/build,/favicons,/img,/fonts` + +Each log line is prefixed with `server-perf:` and contains JSON with request +duration, CPU usage, event loop utilization, and memory usage. + +## Benchmark server endpoints + +``` +bun run benchmark:server --base-url http://localhost:3000 --iterations 20 --concurrency 4 +``` + +Options: + +- `--route /path` (repeatable, overrides defaults) +- `--timeout 10000` (ms) +- `--output ./tmp/benchmark-server.json` + +## Benchmark database paths + +``` +bun run benchmark:perf --iterations 5 +``` + +This measures: + +- Recipients list query + schedule computation +- Past messages counts + page fetch +- Cron query + schedule computation + +## Audit checklist + +1. Compare baseline vs post-deps upgrade using `benchmark:server` and `benchmark:perf`. +2. Enable `PERF_LOGGING` and look for slow routes or high event loop utilization. +3. Check Prisma slow query logs (`PRISMA_QUERY_LOG_THRESHOLD_MS`) and compare to + request timing logs. +4. Validate cron query performance (`benchmark:perf` -> `cron`). +5. Profile CPU (Node `--cpu-prof`) during a `benchmark:server` run if hot paths + are not obvious. diff --git a/package.json b/package.json index 1358de9f..d4210c94 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "typecheck": "react-router typegen && tsc", "benchmark:data": "bun ./scripts/benchmark-data-load.ts", "benchmark:perf": "bun ./scripts/benchmark-performance.ts", + "benchmark:server": "bun ./scripts/benchmark-server.ts", "backfill:recipient-schedules": "bun ./scripts/backfill-recipient-schedules.ts", "validate": "run-p test lint typecheck test:e2e:run" }, diff --git a/scripts/benchmark-server.ts b/scripts/benchmark-server.ts new file mode 100644 index 00000000..f568ab20 --- /dev/null +++ b/scripts/benchmark-server.ts @@ -0,0 +1,258 @@ +import crypto from 'node:crypto' +import fs from 'node:fs' +import { performance } from 'node:perf_hooks' +import { parseArgs } from 'node:util' + +type Summary = { + min: number + max: number + avg: number + median: number + p90: number + p95: number + p99: number +} + +type RequestResult = { + durationMs: number + status: number | null + serverTiming: string | null + error?: string +} + +type RouteReport = { + summary: Summary + statuses: Record + errors: Record + samples: { serverTiming: string[] } +} + +const DEFAULT_ROUTES = [ + '/', + '/login', + '/signup', + '/resources/healthcheck', +] + +const toNumber = (value: string | undefined, fallback: number) => { + if (!value) return fallback + const parsed = Number(value) + return Number.isFinite(parsed) ? parsed : fallback +} + +function percentile(sorted: number[], p: number) { + if (sorted.length === 0) return 0 + const index = Math.ceil((p / 100) * sorted.length) - 1 + const clampedIndex = Math.max(0, Math.min(sorted.length - 1, index)) + return sorted[clampedIndex] ?? 0 +} + +function summarize(values: number[]): Summary { + const sorted = [...values].sort((a, b) => a - b) + const total = values.reduce((acc, value) => acc + value, 0) + const avg = values.length ? total / values.length : 0 + const median = + sorted.length === 0 + ? 0 + : sorted.length % 2 === 0 + ? (sorted[sorted.length / 2 - 1]! + sorted[sorted.length / 2]!) / 2 + : sorted[Math.floor(sorted.length / 2)]! + + return { + min: sorted[0] ?? 0, + max: sorted[sorted.length - 1] ?? 0, + avg, + median, + p90: percentile(sorted, 90), + p95: percentile(sorted, 95), + p99: percentile(sorted, 99), + } +} + +function parseOptions() { + const { values } = parseArgs({ + options: { + 'base-url': { type: 'string' }, + iterations: { type: 'string', short: 'i' }, + concurrency: { type: 'string', short: 'c' }, + timeout: { type: 'string', short: 't' }, + warmup: { type: 'boolean' }, + route: { type: 'string', multiple: true }, + output: { type: 'string' }, + }, + allowPositionals: true, + }) + + const baseUrl = + values['base-url'] ?? process.env.BENCH_BASE_URL ?? 'http://localhost:3000' + const iterations = Math.max( + 1, + Math.floor( + toNumber(values.iterations, toNumber(process.env.BENCH_ITERATIONS, 20)), + ), + ) + const concurrency = Math.max( + 1, + Math.floor( + toNumber(values.concurrency, toNumber(process.env.BENCH_CONCURRENCY, 4)), + ), + ) + const timeoutMs = Math.max( + 100, + Math.floor( + toNumber(values.timeout, toNumber(process.env.BENCH_TIMEOUT_MS, 10000)), + ), + ) + const routes = values.route?.length + ? values.route + : (process.env.BENCH_ROUTES ?? '') + .split(',') + .map((route) => route.trim()) + .filter(Boolean) + + return { + baseUrl, + iterations, + concurrency, + timeoutMs, + warmup: values.warmup ?? true, + routes: routes.length ? routes : DEFAULT_ROUTES, + output: values.output ?? process.env.BENCH_OUTPUT ?? null, + } +} + +async function requestOnce( + baseUrl: string, + route: string, + timeoutMs: number, + perfRunId: string, +): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + const start = performance.now() + + try { + const response = await fetch(`${baseUrl}${route}`, { + signal: controller.signal, + headers: { + 'x-perf-run-id': perfRunId, + 'cache-control': 'no-store', + }, + }) + const serverTiming = response.headers.get('server-timing') + await response.arrayBuffer() + return { + durationMs: performance.now() - start, + status: response.status, + serverTiming, + } + } catch (error) { + const message = error instanceof Error ? error.message : 'unknown error' + return { + durationMs: performance.now() - start, + status: null, + serverTiming: null, + error: message, + } + } finally { + clearTimeout(timeout) + } +} + +async function runRouteBenchmark( + baseUrl: string, + route: string, + iterations: number, + concurrency: number, + timeoutMs: number, + perfRunId: string, +): Promise { + const results: RequestResult[] = [] + let index = 0 + + const workers = Array.from({ length: concurrency }, async () => { + while (index < iterations) { + const current = index + index += 1 + if (current >= iterations) break + results.push(await requestOnce(baseUrl, route, timeoutMs, perfRunId)) + } + }) + + await Promise.all(workers) + + const durations = results + .filter((result) => result.status !== null) + .map((result) => result.durationMs) + const statusCounts: Record = {} + const errorCounts: Record = {} + const serverTimingSamples: string[] = [] + + for (const result of results) { + if (result.status !== null) { + const key = String(result.status) + statusCounts[key] = (statusCounts[key] ?? 0) + 1 + } + if (result.error) { + errorCounts[result.error] = (errorCounts[result.error] ?? 0) + 1 + } + if (result.serverTiming && serverTimingSamples.length < 5) { + serverTimingSamples.push(result.serverTiming) + } + } + + return { + summary: summarize(durations), + statuses: statusCounts, + errors: errorCounts, + samples: { serverTiming: serverTimingSamples }, + } +} + +async function run() { + const options = parseOptions() + const perfRunId = crypto.randomUUID() + + const report: Record = { + meta: { + timestamp: new Date().toISOString(), + baseUrl: options.baseUrl, + iterations: options.iterations, + concurrency: options.concurrency, + timeoutMs: options.timeoutMs, + warmup: options.warmup, + perfRunId, + }, + routes: {}, + } + + if (options.warmup) { + for (const route of options.routes) { + await requestOnce(options.baseUrl, route, options.timeoutMs, perfRunId) + } + } + + for (const route of options.routes) { + const result = await runRouteBenchmark( + options.baseUrl, + route, + options.iterations, + options.concurrency, + options.timeoutMs, + perfRunId, + ) + ;(report.routes as Record)[route] = result + } + + const output = JSON.stringify(report, null, 2) + console.log(output) + + if (options.output) { + fs.writeFileSync(options.output, output) + } +} + +await run().catch((error) => { + console.error('Benchmark failed:', error) + process.exitCode = 1 +}) diff --git a/server/index.ts b/server/index.ts index d6835627..91983b33 100644 --- a/server/index.ts +++ b/server/index.ts @@ -10,6 +10,10 @@ import rateLimit, { ipKeyGenerator } from 'express-rate-limit' import getPort, { portNumbers } from 'get-port' import helmet from 'helmet' import morgan from 'morgan' +import { + createPerfLogger, + getPerfLoggerOptionsFromEnv, +} from './utils/perf-logger.ts' const MODE = process.env.NODE_ENV ?? 'development' const IS_PROD = MODE === 'production' @@ -261,6 +265,10 @@ if (SENTRY_ENABLED) { } const app = express() +const perfLogger = createPerfLogger(getPerfLoggerOptionsFromEnv()) +if (perfLogger) { + app.use(perfLogger) +} const getHost = (req: { get: (key: string) => string | undefined }) => req.get('X-Forwarded-Host') ?? req.get('host') ?? '' diff --git a/server/utils/perf-logger.ts b/server/utils/perf-logger.ts new file mode 100644 index 00000000..40b0a669 --- /dev/null +++ b/server/utils/perf-logger.ts @@ -0,0 +1,140 @@ +import crypto from 'node:crypto' +import { performance } from 'node:perf_hooks' +import type { NextFunction, Request, Response } from 'express' + +type PerfLoggerOptions = { + enabled: boolean + sampleRate: number + slowThresholdMs: number + skipPaths: string[] +} + +type PerfLogEntry = { + requestId: string + perfRunId?: string + method: string + path: string + status: number + outcome: 'finish' | 'close' + durationMs: number + cpuMs: { user: number; system: number } + eventLoop: { utilization: number; activeMs: number; idleMs: number } + memoryMb: { rss: number; heapUsed: number } + contentLength: number | null + timestamp: string +} + +const defaultSkipPaths = ['/assets', '/build', '/favicons', '/img', '/fonts'] + +const toNumber = (value: string | undefined, fallback: number) => { + if (!value) return fallback + const parsed = Number(value) + return Number.isFinite(parsed) ? parsed : fallback +} + +const clamp = (value: number, min: number, max: number) => + Math.min(max, Math.max(min, value)) + +const round = (value: number, digits = 2) => { + const factor = 10 ** digits + return Math.round(value * factor) / factor +} + +const toMb = (bytes: number) => round(bytes / 1024 / 1024, 2) + +export function getPerfLoggerOptionsFromEnv(): PerfLoggerOptions { + const enabled = process.env.PERF_LOGGING === 'true' + const sampleRate = clamp( + toNumber(process.env.PERF_LOG_SAMPLE_RATE, 0.05), + 0, + 1, + ) + const slowThresholdMs = Math.max( + 0, + toNumber(process.env.PERF_SLOW_THRESHOLD_MS, 250), + ) + const envSkipPaths = (process.env.PERF_LOG_SKIP_PATHS ?? '') + .split(',') + .map((value) => value.trim()) + .filter(Boolean) + + return { + enabled, + sampleRate, + slowThresholdMs, + skipPaths: envSkipPaths.length ? envSkipPaths : defaultSkipPaths, + } +} + +export function createPerfLogger(options: PerfLoggerOptions) { + if (!options.enabled) return null + + return (req: Request, res: Response, next: NextFunction) => { + const path = req.path + if (options.skipPaths.some((prefix) => path.startsWith(prefix))) { + next() + return + } + + const requestId = crypto.randomUUID() + const perfRunId = req.get('x-perf-run-id') ?? undefined + const start = performance.now() + const cpuStart = process.cpuUsage() + const eluStart = performance.eventLoopUtilization() + let completed = false + + res.setHeader('x-request-id', requestId) + + const onDone = (outcome: 'finish' | 'close') => { + if (completed) return + completed = true + + const durationMs = performance.now() - start + const shouldLog = + durationMs >= options.slowThresholdMs || + res.statusCode >= 500 || + Math.random() < options.sampleRate + + if (!shouldLog) return + + const cpuUsage = process.cpuUsage(cpuStart) + const elu = performance.eventLoopUtilization(eluStart) + const memory = process.memoryUsage() + const contentLengthHeader = res.getHeader('content-length') + const contentLength = contentLengthHeader + ? Number(contentLengthHeader) + : null + + const entry: PerfLogEntry = { + requestId, + perfRunId, + method: req.method, + path, + status: res.statusCode, + outcome, + durationMs: round(durationMs, 2), + cpuMs: { + user: round(cpuUsage.user / 1000, 2), + system: round(cpuUsage.system / 1000, 2), + }, + eventLoop: { + utilization: round(elu.utilization, 3), + activeMs: round(elu.active, 2), + idleMs: round(elu.idle, 2), + }, + memoryMb: { + rss: toMb(memory.rss), + heapUsed: toMb(memory.heapUsed), + }, + contentLength: Number.isFinite(contentLength) ? contentLength : null, + timestamp: new Date().toISOString(), + } + + console.info(`server-perf: ${JSON.stringify(entry)}`) + } + + res.once('finish', () => onDone('finish')) + res.once('close', () => onDone('close')) + next() + } +}