diff --git a/.changeset/ninety-cows-lay.md b/.changeset/ninety-cows-lay.md new file mode 100644 index 0000000000..67e588ec94 --- /dev/null +++ b/.changeset/ninety-cows-lay.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +feat(sdk): Support debouncing runs when triggering with new debounce options diff --git a/.cursor/rules/migrations.mdc b/.cursor/rules/migrations.mdc new file mode 100644 index 0000000000..370c87c051 --- /dev/null +++ b/.cursor/rules/migrations.mdc @@ -0,0 +1,6 @@ +--- +description: how to create and apply database migrations +alwaysApply: false +--- + +Follow our [migrations.md](mdc:ai/references/migrations.md) guide for how to create and apply database migrations. diff --git a/ai/references/migrations.md b/ai/references/migrations.md new file mode 100644 index 0000000000..c6fbf79e9d --- /dev/null +++ b/ai/references/migrations.md @@ -0,0 +1,121 @@ +## Creating and applying migrations + +We use prisma migrations to manage the database schema. Please follow the following steps when editing the `internal-packages/database/prisma/schema.prisma` file: + +Edit the `schema.prisma` file to add or modify the schema. + +Create a new migration file but don't apply it yet: + +```bash +cd internal-packages/database +pnpm run db:migrate:dev:create --name "add_new_column_to_table" +``` + +The migration file will be created in the `prisma/migrations` directory, but it will have a bunch of edits to the schema that are not needed and will need to be removed before we can apply the migration. Here's an example of what the migration file might look like: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +All the following lines should be removed: + +```sql +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +Leaving only this: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; +``` + +After editing the migration file, apply the migration: + +```bash +cd internal-packages/database +pnpm run db:migrate:deploy && pnpm run generate +``` diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index c5dcbe0520..1cc0db0bf0 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -611,6 +611,12 @@ const EnvironmentSchema = z .default(60_000), RUN_ENGINE_SUSPENDED_HEARTBEAT_RETRIES_FACTOR: z.coerce.number().default(2), + /** Maximum duration in milliseconds that a run can be debounced. Default: 1 hour (3,600,000ms) */ + RUN_ENGINE_MAXIMUM_DEBOUNCE_DURATION_MS: z.coerce + .number() + .int() + .default(60_000 * 60), // 1 hour + RUN_ENGINE_WORKER_REDIS_HOST: z .string() .optional() diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index bf43f40525..4c0e3405cf 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -234,6 +234,7 @@ export class SpanPresenter extends BasePresenter { environmentId: run.runtimeEnvironment.id, idempotencyKey: run.idempotencyKey, idempotencyKeyExpiresAt: run.idempotencyKeyExpiresAt, + debounce: run.debounce as { key: string; delay: string; createdAt: Date } | null, schedule: await this.resolveSchedule(run.scheduleId ?? undefined), queue: { name: run.queue, @@ -357,6 +358,8 @@ export class SpanPresenter extends BasePresenter { //idempotency idempotencyKey: true, idempotencyKeyExpiresAt: true, + //debounce + debounce: true, //delayed delayUntil: true, //ttl diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx index c957653fd8..5e5d2ca3f6 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx @@ -556,6 +556,19 @@ function RunBody({ )} + + Debounce + + {run.debounce ? ( +
+
Key: {run.debounce.key}
+
Delay: {run.debounce.delay}
+
+ ) : ( + "–" + )} +
+
Version diff --git a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts index 634df34e4a..cb2eaa30a5 100644 --- a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts +++ b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts @@ -51,6 +51,7 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { traceparent, setAttribute: (key, value) => event.setAttribute(key as any, value), failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), }, store ); @@ -116,6 +117,73 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { traceparent, setAttribute: (key, value) => event.setAttribute(key as any, value), failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), + }, + store + ); + } + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + const { existingRun, debounceKey, incomplete, isError } = options; + const { repository, store } = await this.#getEventRepository(request, parentStore); + + return await repository.traceEvent( + `${request.taskId} (debounced)`, + { + context: request.options?.traceContext, + spanParentAsLink: request.options?.spanParentAsLink, + kind: "SERVER", + environment: request.environment, + taskSlug: request.taskId, + attributes: { + properties: { + [SemanticInternalAttributes.ORIGINAL_RUN_ID]: existingRun.friendlyId, + }, + style: { + icon: "task-cached", + }, + runId: existingRun.friendlyId, + }, + incomplete, + isError, + immediate: true, + }, + async (event, traceContext, traceparent) => { + // Log a message about the debounced trigger + await repository.recordEvent( + `Debounced: using existing run with key "${debounceKey}"`, + { + taskSlug: request.taskId, + environment: request.environment, + attributes: { + runId: existingRun.friendlyId, + }, + context: request.options?.traceContext, + parentId: event.spanId, + } + ); + + return await callback( + { + traceId: event.traceId, + spanId: event.spanId, + traceContext, + traceparent, + setAttribute: (key, value) => event.setAttribute(key as any, value), + failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), }, store ); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index f2e9ed5502..ab32682811 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -160,10 +160,34 @@ export class RunEngineTriggerTaskService { } } - const [parseDelayError, delayUntil] = await tryCatch(parseDelay(body.options?.delay)); + // Parse delay from either explicit delay option or debounce.delay + const delaySource = body.options?.delay ?? body.options?.debounce?.delay; + const [parseDelayError, delayUntil] = await tryCatch(parseDelay(delaySource)); if (parseDelayError) { - throw new ServiceValidationError(`Invalid delay ${body.options?.delay}`); + throw new ServiceValidationError(`Invalid delay ${delaySource}`); + } + + // Validate debounce options + if (body.options?.debounce) { + if (!delayUntil) { + throw new ServiceValidationError( + `Debounce requires a valid delay duration. Provided: ${body.options.debounce.delay}` + ); + } + + // Always validate debounce.delay separately since it's used for rescheduling + // This catches the case where options.delay is valid but debounce.delay is invalid + const [debounceDelayError, debounceDelayUntil] = await tryCatch( + parseDelay(body.options.debounce.delay) + ); + + if (debounceDelayError || !debounceDelayUntil) { + throw new ServiceValidationError( + `Invalid debounce delay: ${body.options.debounce.delay}. ` + + `Supported formats: {number}s, {number}m, {number}h, {number}d, {number}w` + ); + } } const ttl = @@ -340,10 +364,48 @@ export class RunEngineTriggerTaskService { bulkActionId: body.options?.bulkActionId, planType, realtimeStreamsVersion: options.realtimeStreamsVersion, + debounce: body.options?.debounce, + // When debouncing with triggerAndWait, create a span for the debounced trigger + onDebounced: + body.options?.debounce && body.options?.resumeParentOnCompletion + ? async ({ existingRun, waitpoint, debounceKey }) => { + return await this.traceEventConcern.traceDebouncedRun( + triggerRequest, + parentRun?.taskEventStore, + { + existingRun, + debounceKey, + incomplete: waitpoint.status === "PENDING", + isError: waitpoint.outputIsError, + }, + async (spanEvent) => { + const spanId = + options?.parentAsLinkType === "replay" + ? spanEvent.spanId + : spanEvent.traceparent?.spanId + ? `${spanEvent.traceparent.spanId}:${spanEvent.spanId}` + : spanEvent.spanId; + return spanId; + } + ); + } + : undefined, }, this.prisma ); + // If the returned run has a different friendlyId, it was debounced. + // For triggerAndWait: stop the outer span since a replacement debounced span was created via onDebounced. + // For regular trigger: let the span complete normally - no replacement span needed since the + // original run already has its span from when it was first created. + if ( + taskRun.friendlyId !== runFriendlyId && + body.options?.debounce && + body.options?.resumeParentOnCompletion + ) { + event.stop(); + } + const error = taskRun.error ? TaskRunError.parse(taskRun.error) : undefined; if (error) { diff --git a/apps/webapp/app/runEngine/types.ts b/apps/webapp/app/runEngine/types.ts index 03fa7a322f..7186d81ff9 100644 --- a/apps/webapp/app/runEngine/types.ts +++ b/apps/webapp/app/runEngine/types.ts @@ -131,6 +131,12 @@ export type TracedEventSpan = { }; setAttribute: (key: string, value: string) => void; failWithError: (error: TaskRunError) => void; + /** + * Stop the span without writing any event. + * Used when a debounced run is returned - the span for the debounced + * trigger is created separately via traceDebouncedRun. + */ + stop: () => void; }; export interface TraceEventConcern { @@ -150,6 +156,17 @@ export interface TraceEventConcern { }, callback: (span: TracedEventSpan, store: string) => Promise ): Promise; + traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise; } export type TriggerRacepoints = "idempotencyKey"; diff --git a/apps/webapp/app/v3/runEngine.server.ts b/apps/webapp/app/v3/runEngine.server.ts index db1760755c..5f88d5f6a4 100644 --- a/apps/webapp/app/v3/runEngine.server.ts +++ b/apps/webapp/app/v3/runEngine.server.ts @@ -182,6 +182,10 @@ function createRunEngine() { ? createBatchGlobalRateLimiter(env.BATCH_QUEUE_GLOBAL_RATE_LIMIT) : undefined, }, + // Debounce configuration + debounce: { + maxDebounceDurationMs: env.RUN_ENGINE_MAXIMUM_DEBOUNCE_DURATION_MS, + }, }); return engine; diff --git a/apps/webapp/package.json b/apps/webapp/package.json index 9d732eb17f..45de003c8d 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -14,7 +14,7 @@ "lint": "eslint --cache --cache-location ./node_modules/.cache/eslint .", "start": "cross-env NODE_ENV=production node --max-old-space-size=8192 ./build/server.js", "start:local": "cross-env node --max-old-space-size=8192 ./build/server.js", - "typecheck": "tsc --noEmit -p ./tsconfig.check.json", + "typecheck": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" tsc --noEmit -p ./tsconfig.check.json", "db:seed": "tsx seed.mts", "upload:sourcemaps": "bash ./upload-sourcemaps.sh", "test": "vitest --no-file-parallelism", @@ -288,4 +288,4 @@ "engines": { "node": ">=18.19.0 || >=20.6.0" } -} +} \ No newline at end of file diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index 463e6e71df..0306c6f235 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -80,6 +80,7 @@ class MockTraceEventConcern implements TraceEventConcern { traceparent: undefined, setAttribute: () => {}, failWithError: () => {}, + stop: () => {}, }, "test" ); @@ -104,6 +105,32 @@ class MockTraceEventConcern implements TraceEventConcern { traceparent: undefined, setAttribute: () => {}, failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, }, "test" ); @@ -910,4 +937,239 @@ describe("RunEngineTriggerTaskService", () => { await engine.quit(); } ); + + containerTest( + "should reject invalid debounce.delay when no explicit delay is provided", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Invalid debounce.delay format (ms not supported) + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + debounce: { + key: "test-key", + delay: "300ms", // Invalid - ms not supported + }, + }, + }, + }) + ).rejects.toThrow("Debounce requires a valid delay duration"); + + await engine.quit(); + } + ); + + containerTest( + "should reject invalid debounce.delay even when explicit delay is valid", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Valid explicit delay but invalid debounce.delay + // This is the bug case: the explicit delay passes validation, + // but debounce.delay would fail later when rescheduling + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + delay: "5m", // Valid explicit delay + debounce: { + key: "test-key", + delay: "invalid-delay", // Invalid debounce delay + }, + }, + }, + }) + ).rejects.toThrow("Invalid debounce delay"); + + await engine.quit(); + } + ); + + containerTest( + "should accept valid debounce.delay formats", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Valid debounce.delay format + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + debounce: { + key: "test-key", + delay: "5s", // Valid format + }, + }, + }, + }); + + expect(result).toBeDefined(); + expect(result?.run.friendlyId).toBeDefined(); + + await engine.quit(); + } + ); }); diff --git a/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql b/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql new file mode 100644 index 0000000000..407c3c856d --- /dev/null +++ b/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql @@ -0,0 +1,5 @@ +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; \ No newline at end of file diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 5207ada480..28c8332966 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -581,6 +581,10 @@ model TaskRun { idempotencyKey String? idempotencyKeyExpiresAt DateTime? + + /// Debounce options: { key: string, delay: string, createdAt: Date } + debounce Json? + taskIdentifier String isTest Boolean @default(false) @@ -947,6 +951,8 @@ model TaskRunExecutionSnapshot { enum TaskRunExecutionStatus { /// Run has been created RUN_CREATED + /// Run is delayed, waiting to be enqueued + DELAYED /// Run is in the RunQueue QUEUED /// Run is in the RunQueue, and is also executing. This happens when a run is continued cannot reacquire concurrency diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 9bd495f327..1b53d6378d 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -47,6 +47,7 @@ import { RunLocker } from "./locking.js"; import { getFinalRunStatuses } from "./statuses.js"; import { BatchSystem } from "./systems/batchSystem.js"; import { CheckpointSystem } from "./systems/checkpointSystem.js"; +import { DebounceSystem } from "./systems/debounceSystem.js"; import { DelayedRunSystem } from "./systems/delayedRunSystem.js"; import { DequeueSystem } from "./systems/dequeueSystem.js"; import { EnqueueSystem } from "./systems/enqueueSystem.js"; @@ -95,6 +96,7 @@ export class RunEngine { enqueueSystem: EnqueueSystem; checkpointSystem: CheckpointSystem; delayedRunSystem: DelayedRunSystem; + debounceSystem: DebounceSystem; ttlSystem: TtlSystem; pendingVersionSystem: PendingVersionSystem; raceSimulationSystem: RaceSimulationSystem = new RaceSimulationSystem(); @@ -297,6 +299,14 @@ export class RunEngine { enqueueSystem: this.enqueueSystem, }); + this.debounceSystem = new DebounceSystem({ + resources, + redis: options.debounce?.redis ?? options.runLock.redis, + executionSnapshotSystem: this.executionSnapshotSystem, + delayedRunSystem: this.delayedRunSystem, + maxDebounceDurationMs: options.debounce?.maxDebounceDurationMs ?? 60 * 60 * 1000, // Default 1 hour + }); + this.pendingVersionSystem = new PendingVersionSystem({ resources, enqueueSystem: this.enqueueSystem, @@ -428,6 +438,8 @@ export class RunEngine { bulkActionId, planType, realtimeStreamsVersion, + debounce, + onDebounced, }: TriggerParams, tx?: PrismaClientOrTransaction ): Promise { @@ -437,6 +449,77 @@ export class RunEngine { this.tracer, "trigger", async (span) => { + // Handle debounce before creating a new run + // Store claimId if we successfully claimed the debounce key + let debounceClaimId: string | undefined; + + if (debounce) { + const debounceResult = await this.debounceSystem.handleDebounce({ + environmentId: environment.id, + taskIdentifier, + debounce: + debounce.mode === "trailing" + ? { + ...debounce, + updateData: { + payload, + payloadType, + metadata, + metadataType, + tags, + maxAttempts, + maxDurationInSeconds, + machine, + }, + } + : debounce, + tx: prisma, + }); + + if (debounceResult.status === "existing") { + span.setAttribute("debounced", true); + span.setAttribute("existingRunId", debounceResult.run.id); + + // For triggerAndWait, block the parent run with the existing run's waitpoint + if (resumeParentOnCompletion && parentTaskRunId && debounceResult.waitpoint) { + // Call the onDebounced callback to create a span and get spanIdToComplete + let spanIdToComplete: string | undefined; + if (onDebounced) { + spanIdToComplete = await onDebounced({ + existingRun: debounceResult.run, + waitpoint: debounceResult.waitpoint, + debounceKey: debounce.key, + }); + } + + await this.waitpointSystem.blockRunWithWaitpoint({ + runId: parentTaskRunId, + waitpoints: debounceResult.waitpoint.id, + spanIdToComplete, + projectId: environment.project.id, + organizationId: environment.organization.id, + batch, + workerId, + runnerId, + tx: prisma, + }); + } + + return debounceResult.run; + } + + // If max_duration_exceeded, we continue to create a new run without debouncing + if (debounceResult.status === "max_duration_exceeded") { + span.setAttribute("debounceMaxDurationExceeded", true); + } + + // Store the claimId for later registration + if (debounceResult.status === "new" && debounceResult.claimId) { + debounceClaimId = debounceResult.claimId; + span.setAttribute("debounceClaimId", debounceClaimId); + } + } + const status = delayUntil ? "DELAYED" : "PENDING"; //create run @@ -508,11 +591,18 @@ export class RunEngine { bulkActionGroupIds: bulkActionId ? [bulkActionId] : undefined, planType, realtimeStreamsVersion, + debounce: debounce + ? { + key: debounce.key, + delay: debounce.delay, + createdAt: new Date(), + } + : undefined, executionSnapshots: { create: { engine: "V2", - executionStatus: "RUN_CREATED", - description: "Run was created", + executionStatus: delayUntil ? "DELAYED" : "RUN_CREATED", + description: delayUntil ? "Run is delayed" : "Run was created", runStatus: status, environmentId: environment.id, environmentType: environment.type, @@ -582,6 +672,27 @@ export class RunEngine { runId: taskRun.id, delayUntil: taskRun.delayUntil, }); + + // Register debounced run in Redis for future lookups + if (debounce) { + const registered = await this.debounceSystem.registerDebouncedRun({ + runId: taskRun.id, + environmentId: environment.id, + taskIdentifier, + debounceKey: debounce.key, + delayUntil: taskRun.delayUntil, + claimId: debounceClaimId, + }); + + if (!registered) { + // We lost the claim - this shouldn't normally happen, but log it + this.logger.warn("trigger: lost debounce claim after creating run", { + runId: taskRun.id, + debounceKey: debounce.key, + claimId: debounceClaimId, + }); + } + } } else { if (taskRun.ttl) { await this.ttlSystem.scheduleExpireRun({ runId: taskRun.id, ttl: taskRun.ttl }); @@ -1307,6 +1418,9 @@ export class RunEngine { // Close the batch queue and its Redis connections await this.batchQueue.close(); + + // Close the debounce system Redis connection + await this.debounceSystem.quit(); } catch (error) { // And should always throw } @@ -1780,6 +1894,9 @@ export class RunEngine { case "FINISHED": { throw new NotImplementedError("There shouldn't be a heartbeat for FINISHED"); } + case "DELAYED": { + throw new NotImplementedError("There shouldn't be a heartbeat for DELAYED"); + } default: { assertNever(latestSnapshot.executionStatus); } @@ -1820,7 +1937,8 @@ export class RunEngine { case "PENDING_CANCEL": case "PENDING_EXECUTING": case "QUEUED_EXECUTING": - case "RUN_CREATED": { + case "RUN_CREATED": + case "DELAYED": { // Do nothing; return; } diff --git a/internal-packages/run-engine/src/engine/statuses.ts b/internal-packages/run-engine/src/engine/statuses.ts index ce7e9c8129..8483225623 100644 --- a/internal-packages/run-engine/src/engine/statuses.ts +++ b/internal-packages/run-engine/src/engine/statuses.ts @@ -37,7 +37,7 @@ export function isFinishedOrPendingFinished(status: TaskRunExecutionStatus): boo } export function isInitialState(status: TaskRunExecutionStatus): boolean { - const startedStatuses: TaskRunExecutionStatus[] = ["RUN_CREATED"]; + const startedStatuses: TaskRunExecutionStatus[] = ["RUN_CREATED", "DELAYED"]; return startedStatuses.includes(status); } diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts new file mode 100644 index 0000000000..af25a31552 --- /dev/null +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -0,0 +1,906 @@ +import { + createRedisClient, + Redis, + RedisOptions, + type Callback, + type Result, +} from "@internal/redis"; +import { startSpan } from "@internal/tracing"; +import { parseNaturalLanguageDuration } from "@trigger.dev/core/v3/isomorphic"; +import { PrismaClientOrTransaction, TaskRun, Waitpoint } from "@trigger.dev/database"; +import { nanoid } from "nanoid"; +import { SystemResources } from "./systems.js"; +import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; +import { DelayedRunSystem } from "./delayedRunSystem.js"; + +export type DebounceOptions = { + key: string; + delay: string; + mode?: "leading" | "trailing"; + /** When mode: "trailing", these fields will be used to update the existing run */ + updateData?: { + payload: string; + payloadType: string; + metadata?: string; + metadataType?: string; + tags?: { id: string; name: string }[]; + maxAttempts?: number; + maxDurationInSeconds?: number; + machine?: string; + }; +}; + +export type DebounceSystemOptions = { + resources: SystemResources; + redis: RedisOptions; + executionSnapshotSystem: ExecutionSnapshotSystem; + delayedRunSystem: DelayedRunSystem; + maxDebounceDurationMs: number; +}; + +export type DebounceResult = + | { + status: "new"; + claimId?: string; // Present when we claimed the key atomically + } + | { + status: "existing"; + run: TaskRun; + waitpoint: Waitpoint | null; + } + | { + status: "max_duration_exceeded"; + }; + +// TTL for the pending claim state (30 seconds) +const CLAIM_TTL_MS = 30_000; +// Max retries when waiting for another server to complete its claim +const MAX_CLAIM_RETRIES = 10; +// Delay between retries when waiting for pending claim +const CLAIM_RETRY_DELAY_MS = 50; + +export type DebounceData = { + key: string; + delay: string; + createdAt: Date; +}; + +/** + * DebounceSystem handles debouncing of task triggers. + * + * When a run is triggered with a debounce key, if an existing run with the same key + * is still in the DELAYED execution status, the new trigger "pushes" the existing + * run's execution time later rather than creating a new run. + * + * The debounce key mapping is stored in Redis for fast lookups (to avoid database indexes). + */ +export class DebounceSystem { + private readonly $: SystemResources; + private readonly redis: Redis; + private readonly executionSnapshotSystem: ExecutionSnapshotSystem; + private readonly delayedRunSystem: DelayedRunSystem; + private readonly maxDebounceDurationMs: number; + + constructor(options: DebounceSystemOptions) { + this.$ = options.resources; + this.redis = createRedisClient( + { + ...options.redis, + keyPrefix: `${options.redis.keyPrefix ?? ""}debounce:`, + }, + { + onError: (error) => { + this.$.logger.error("DebounceSystem redis client error:", { error }); + }, + } + ); + this.executionSnapshotSystem = options.executionSnapshotSystem; + this.delayedRunSystem = options.delayedRunSystem; + this.maxDebounceDurationMs = options.maxDebounceDurationMs; + + this.#registerCommands(); + } + + #registerCommands() { + // Atomically deletes a key only if its value starts with "pending:". + // Returns [1, nil] if deleted (was pending or didn't exist) + // Returns [0, value] if not deleted (has a run ID) + // This prevents the race condition where between checking "still pending?" + // and calling DEL, the original server could complete and register a valid run ID. + this.redis.defineCommand("conditionallyDeletePendingKey", { + numberOfKeys: 1, + lua: ` +local value = redis.call('GET', KEYS[1]) +if not value then + return { 1, nil } +end +if string.sub(value, 1, 8) == 'pending:' then + redis.call('DEL', KEYS[1]) + return { 1, nil } +end +return { 0, value } + `, + }); + + // Atomically sets runId only if current value equals expected pending claim. + // This prevents the TOCTOU race condition where between GET (check claim) and SET (register), + // another server could claim and register a different run, which would get overwritten. + // Returns 1 if set succeeded, 0 if claim mismatch (lost the claim). + this.redis.defineCommand("registerIfClaimOwned", { + numberOfKeys: 1, + lua: ` +local value = redis.call('GET', KEYS[1]) +if value == ARGV[1] then + redis.call('SET', KEYS[1], ARGV[2], 'PX', ARGV[3]) + return 1 +end +return 0 + `, + }); + } + + /** + * Gets the Redis key for a debounce lookup. + * Key pattern: {envId}:{taskId}:{debounceKey} + */ + private getDebounceRedisKey(envId: string, taskId: string, debounceKey: string): string { + return `${envId}:${taskId}:${debounceKey}`; + } + + /** + * Atomically deletes a key only if its value still starts with "pending:". + * This prevents the race condition where between the final GET check and DEL, + * the original server could complete and register a valid run ID. + * + * @returns { deleted: true } if the key was deleted or didn't exist + * @returns { deleted: false, existingRunId: string } if the key has a valid run ID + */ + private async conditionallyDeletePendingKey( + redisKey: string + ): Promise<{ deleted: true } | { deleted: false; existingRunId: string }> { + const result = await this.redis.conditionallyDeletePendingKey(redisKey); + + if (!result) { + // Should not happen, but treat as deleted if no result + return { deleted: true }; + } + + const [wasDeleted, currentValue] = result; + + if (wasDeleted === 1) { + return { deleted: true }; + } + + // Key exists with a valid run ID + return { deleted: false, existingRunId: currentValue! }; + } + + /** + * Atomically claims a debounce key using SET NX. + * This prevents the race condition where two servers both check for an existing + * run, find none, and both create new runs. + * + * Returns: + * - { claimed: true } if we successfully claimed the key + * - { claimed: false, existingRunId: string } if key exists with a run ID + * - { claimed: false, existingRunId: null } if key exists but is pending (another server is creating) + */ + private async claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey, + claimId, + ttlMs, + }: { + environmentId: string; + taskIdentifier: string; + debounceKey: string; + claimId: string; + ttlMs: number; + }): Promise<{ claimed: true } | { claimed: false; existingRunId: string | null }> { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + + // Try to claim with SET NX (only succeeds if key doesn't exist) + const result = await this.redis.set(redisKey, `pending:${claimId}`, "PX", ttlMs, "NX"); + + if (result === "OK") { + this.$.logger.debug("claimDebounceKey: claimed key", { + redisKey, + claimId, + debounceKey, + }); + return { claimed: true }; + } + + // Claim failed - get existing value + const existingValue = await this.redis.get(redisKey); + + if (!existingValue) { + // Key expired between SET and GET - rare race, return null to trigger retry + this.$.logger.debug("claimDebounceKey: key expired between SET and GET", { + redisKey, + debounceKey, + }); + return { claimed: false, existingRunId: null }; + } + + if (existingValue.startsWith("pending:")) { + // Another server is creating the run - return null to trigger wait/retry + this.$.logger.debug("claimDebounceKey: key is pending (another server is creating)", { + redisKey, + debounceKey, + existingValue, + }); + return { claimed: false, existingRunId: null }; + } + + // It's a run ID + this.$.logger.debug("claimDebounceKey: found existing run", { + redisKey, + debounceKey, + existingRunId: existingValue, + }); + return { claimed: false, existingRunId: existingValue }; + } + + /** + * Atomically claims the debounce key before returning "new". + * This prevents the race condition where returning "new" without a claimId + * allows registerDebouncedRun to do a plain SET that can overwrite another server's registration. + * + * This method is called when we've determined there's no valid existing run but need + * to safely claim the key before creating a new one. + */ + private async claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + const claimId = nanoid(16); + + const claimResult = await this.claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey: debounce.key, + claimId, + ttlMs: CLAIM_TTL_MS, + }); + + if (claimResult.claimed) { + this.$.logger.debug("claimKeyForNewRun: claimed key, returning new", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + claimId, + }); + return { status: "new", claimId }; + } + + if (claimResult.existingRunId) { + // Another server registered a run while we were processing - handle it + this.$.logger.debug("claimKeyForNewRun: found existing run, handling it", { + debounceKey: debounce.key, + existingRunId: claimResult.existingRunId, + }); + return await this.handleExistingRun({ + existingRunId: claimResult.existingRunId, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Another server is creating (pending state) - wait for it + this.$.logger.debug("claimKeyForNewRun: key is pending, waiting for existing run", { + debounceKey: debounce.key, + }); + return await this.waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + /** + * Waits for another server to complete its claim and register a run ID. + * Used when we detect a "pending" state, meaning another server has claimed + * the key but hasn't yet created the run. + */ + private async waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + + for (let i = 0; i < MAX_CLAIM_RETRIES; i++) { + await new Promise((resolve) => setTimeout(resolve, CLAIM_RETRY_DELAY_MS)); + + const value = await this.redis.get(redisKey); + + if (!value) { + // Key expired or was deleted - atomically claim before returning "new" + this.$.logger.debug("waitForExistingRun: key expired/deleted, claiming key", { + redisKey, + debounceKey: debounce.key, + attempt: i + 1, + }); + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + if (!value.startsWith("pending:")) { + // It's a run ID now - proceed with reschedule logic + this.$.logger.debug("waitForExistingRun: found run ID, handling existing run", { + redisKey, + debounceKey: debounce.key, + existingRunId: value, + attempt: i + 1, + }); + return await this.handleExistingRun({ + existingRunId: value, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + this.$.logger.debug("waitForExistingRun: still pending, retrying", { + redisKey, + debounceKey: debounce.key, + attempt: i + 1, + value, + }); + } + + // Timed out waiting - the other server may have failed + // Conditionally delete the key only if it's still pending + // This prevents the race where the original server completed between our last check and now + this.$.logger.warn( + "waitForExistingRun: timed out waiting for pending claim, attempting conditional delete", + { + redisKey, + debounceKey: debounce.key, + } + ); + + const deleteResult = await this.conditionallyDeletePendingKey(redisKey); + + if (deleteResult.deleted) { + // Key was pending (or didn't exist) - atomically claim before returning "new" + this.$.logger.debug("waitForExistingRun: stale pending key deleted, claiming key", { + redisKey, + debounceKey: debounce.key, + }); + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Key now has a valid run ID - the original server completed! + // Handle the existing run instead of creating a duplicate + this.$.logger.debug( + "waitForExistingRun: original server completed during timeout, handling existing run", + { + redisKey, + debounceKey: debounce.key, + existingRunId: deleteResult.existingRunId, + } + ); + return await this.handleExistingRun({ + existingRunId: deleteResult.existingRunId, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + /** + * Handles an existing debounced run by locking it and rescheduling. + * Extracted to be reusable by both handleDebounce and waitForExistingRun. + */ + private async handleExistingRun({ + existingRunId, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }: { + existingRunId: string; + redisKey: string; + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + return await this.$.runLock.lock("handleDebounce", [existingRunId], async () => { + const prisma = tx ?? this.$.prisma; + + // Get the latest execution snapshot + let snapshot; + try { + snapshot = await getLatestExecutionSnapshot(prisma, existingRunId); + } catch (error) { + // Run no longer exists or has no snapshot + this.$.logger.debug("handleExistingRun: existing run not found or has no snapshot", { + existingRunId, + debounceKey: debounce.key, + error, + }); + // Clean up stale Redis key and atomically claim before returning "new" + await this.redis.del(redisKey); + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Check if run is still in DELAYED status (or legacy RUN_CREATED for older runs) + if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { + this.$.logger.debug("handleExistingRun: existing run is no longer delayed", { + existingRunId, + executionStatus: snapshot.executionStatus, + debounceKey: debounce.key, + }); + // Clean up Redis key and atomically claim before returning "new" + await this.redis.del(redisKey); + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Get the run to check debounce metadata and createdAt + const existingRun = await prisma.taskRun.findFirst({ + where: { id: existingRunId }, + include: { + associatedWaitpoint: true, + }, + }); + + if (!existingRun) { + this.$.logger.debug("handleExistingRun: existing run not found in database", { + existingRunId, + debounceKey: debounce.key, + }); + // Clean up stale Redis key and atomically claim before returning "new" + await this.redis.del(redisKey); + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Calculate new delay - parseNaturalLanguageDuration returns a Date (now + duration) + const newDelayUntil = parseNaturalLanguageDuration(debounce.delay); + if (!newDelayUntil) { + this.$.logger.error("handleExistingRun: invalid delay duration", { + delay: debounce.delay, + }); + // Invalid delay but we still need to atomically claim before returning "new" + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Check if max debounce duration would be exceeded + const runCreatedAt = existingRun.createdAt; + const maxDelayUntil = new Date(runCreatedAt.getTime() + this.maxDebounceDurationMs); + + if (newDelayUntil > maxDelayUntil) { + this.$.logger.debug("handleExistingRun: max debounce duration would be exceeded", { + existingRunId, + debounceKey: debounce.key, + runCreatedAt, + newDelayUntil, + maxDelayUntil, + maxDebounceDurationMs: this.maxDebounceDurationMs, + }); + // Clean up Redis key since this debounce window is closed + await this.redis.del(redisKey); + return { status: "max_duration_exceeded" }; + } + + // Only reschedule if the new delay would push the run later + // This ensures debounce always "pushes later", never earlier + const currentDelayUntil = existingRun.delayUntil; + const shouldReschedule = !currentDelayUntil || newDelayUntil > currentDelayUntil; + + if (shouldReschedule) { + // Reschedule the delayed run + await this.delayedRunSystem.rescheduleDelayedRun({ + runId: existingRunId, + delayUntil: newDelayUntil, + tx: prisma, + }); + + // Update Redis TTL + const ttlMs = Math.max( + newDelayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + await this.redis.pexpire(redisKey, ttlMs); + + this.$.logger.debug("handleExistingRun: rescheduled existing debounced run", { + existingRunId, + debounceKey: debounce.key, + newDelayUntil, + }); + } else { + this.$.logger.debug( + "handleExistingRun: skipping reschedule, new delay is not later than current", + { + existingRunId, + debounceKey: debounce.key, + currentDelayUntil, + newDelayUntil, + } + ); + } + + // Update run data when mode is "trailing" + let updatedRun = existingRun; + if (debounce.mode === "trailing" && debounce.updateData) { + updatedRun = await this.#updateRunForTrailingMode({ + runId: existingRunId, + updateData: debounce.updateData, + tx: prisma, + }); + + this.$.logger.debug("handleExistingRun: updated run data for trailing mode", { + existingRunId, + debounceKey: debounce.key, + }); + } + + return { + status: "existing", + run: updatedRun, + waitpoint: existingRun.associatedWaitpoint, + }; + }); + } + + /** + * Called during trigger to check for an existing debounced run. + * If found and still in DELAYED status, reschedules it and returns the existing run. + * + * Uses atomic SET NX to prevent the distributed race condition where two servers + * both check for an existing run, find none, and both create new runs. + * + * Note: This method does NOT handle blocking parent runs for triggerAndWait. + * The caller (RunEngine.trigger) is responsible for blocking using waitpointSystem.blockRunWithWaitpoint(). + */ + async handleDebounce({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + return startSpan( + this.$.tracer, + "handleDebounce", + async (span) => { + span.setAttribute("debounceKey", debounce.key); + span.setAttribute("taskIdentifier", taskIdentifier); + span.setAttribute("environmentId", environmentId); + + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + const claimId = nanoid(16); // Unique ID for this claim attempt + + // Try to atomically claim the debounce key + const claimResult = await this.claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey: debounce.key, + claimId, + ttlMs: CLAIM_TTL_MS, + }); + + if (claimResult.claimed) { + // We successfully claimed the key - return "new" to create the run + // Caller will call registerDebouncedRun after creating the run + this.$.logger.debug("handleDebounce: claimed key, returning new", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + claimId, + }); + span.setAttribute("claimed", true); + span.setAttribute("claimId", claimId); + return { status: "new", claimId }; + } + + if (!claimResult.existingRunId) { + // Another server is creating - wait and retry to get the run ID + this.$.logger.debug("handleDebounce: key is pending, waiting for existing run", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + }); + span.setAttribute("waitingForPending", true); + return await this.waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Found existing run - lock and reschedule + span.setAttribute("existingRunId", claimResult.existingRunId); + return await this.handleExistingRun({ + existingRunId: claimResult.existingRunId, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }); + }, + { + attributes: { + environmentId, + taskIdentifier, + debounceKey: debounce.key, + }, + } + ); + } + + /** + * Stores the debounce key -> runId mapping after creating a new debounced run. + * + * If claimId is provided, verifies we still own the pending claim before registering. + * This prevents a race where our claim expired and another server took over. + * + * @returns true if registration succeeded, false if we lost the claim + */ + async registerDebouncedRun({ + runId, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId, + }: { + runId: string; + environmentId: string; + taskIdentifier: string; + debounceKey: string; + delayUntil: Date; + claimId?: string; + }): Promise { + return startSpan( + this.$.tracer, + "registerDebouncedRun", + async (span) => { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + + // Calculate TTL: delay until + buffer + const ttlMs = Math.max( + delayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + + if (claimId) { + // Use atomic Lua script to verify claim and set runId in one operation. + // This prevents the TOCTOU race where another server could claim and register + // between our GET check and SET. + const result = await this.redis.registerIfClaimOwned( + redisKey, + `pending:${claimId}`, + runId, + ttlMs.toString() + ); + + if (result === 0) { + // We lost the claim - another server took over or it expired + this.$.logger.warn("registerDebouncedRun: lost claim, not registering", { + runId, + environmentId, + taskIdentifier, + debounceKey, + claimId, + }); + span.setAttribute("claimLost", true); + return false; + } + } else { + // No claim to verify, just set directly + await this.redis.set(redisKey, runId, "PX", ttlMs); + } + + this.$.logger.debug("registerDebouncedRun: stored debounce key mapping", { + runId, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + ttlMs, + claimId, + }); + + span.setAttribute("registered", true); + return true; + }, + { + attributes: { + runId, + environmentId, + taskIdentifier, + debounceKey, + claimId: claimId ?? "none", + }, + } + ); + } + + /** + * Clears the debounce key when a run is enqueued or completed. + */ + async clearDebounceKey({ + environmentId, + taskIdentifier, + debounceKey, + }: { + environmentId: string; + taskIdentifier: string; + debounceKey: string; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + await this.redis.del(redisKey); + + this.$.logger.debug("clearDebounceKey: cleared debounce key mapping", { + environmentId, + taskIdentifier, + debounceKey, + }); + } + + /** + * Updates a run's data for trailing mode debounce. + * Updates: payload, metadata, tags, maxAttempts, maxDurationInSeconds, machinePreset + */ + async #updateRunForTrailingMode({ + runId, + updateData, + tx, + }: { + runId: string; + updateData: NonNullable; + tx?: PrismaClientOrTransaction; + }): Promise { + const prisma = tx ?? this.$.prisma; + + // Build the update object + const updatePayload: { + payload: string; + payloadType: string; + metadata?: string; + metadataType?: string; + maxAttempts?: number; + maxDurationInSeconds?: number; + machinePreset?: string; + runTags?: string[]; + tags?: { + set: { id: string }[]; + }; + } = { + payload: updateData.payload, + payloadType: updateData.payloadType, + }; + + if (updateData.metadata !== undefined) { + updatePayload.metadata = updateData.metadata; + updatePayload.metadataType = updateData.metadataType ?? "application/json"; + } + + if (updateData.maxAttempts !== undefined) { + updatePayload.maxAttempts = updateData.maxAttempts; + } + + if (updateData.maxDurationInSeconds !== undefined) { + updatePayload.maxDurationInSeconds = updateData.maxDurationInSeconds; + } + + if (updateData.machine !== undefined) { + updatePayload.machinePreset = updateData.machine; + } + + // Handle tags update - replace existing tags + if (updateData.tags !== undefined) { + updatePayload.runTags = updateData.tags.map((t) => t.name); + updatePayload.tags = { + set: updateData.tags.map((t) => ({ id: t.id })), + }; + } + + const updatedRun = await prisma.taskRun.update({ + where: { id: runId }, + data: updatePayload, + include: { + associatedWaitpoint: true, + }, + }); + + return updatedRun; + } + + async quit(): Promise { + await this.redis.quit(); + } +} + +declare module "@internal/redis" { + interface RedisCommander { + /** + * Atomically deletes a key only if its value starts with "pending:". + * @returns [1, nil] if deleted (was pending or didn't exist) + * @returns [0, value] if not deleted (has a run ID) + */ + conditionallyDeletePendingKey( + key: string, + callback?: Callback<[number, string | null]> + ): Result<[number, string | null], Context>; + + /** + * Atomically sets runId only if current value equals expected pending claim. + * Prevents TOCTOU race condition between claim verification and registration. + * @param key - The Redis key + * @param expectedClaim - Expected value "pending:{claimId}" + * @param runId - The new value (run ID) to set + * @param ttlMs - TTL in milliseconds + * @returns 1 if set succeeded, 0 if claim mismatch + */ + registerIfClaimOwned( + key: string, + expectedClaim: string, + runId: string, + ttlMs: string, + callback?: Callback + ): Result; + } +} diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts index e3dca4b544..740ce1a849 100644 --- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts @@ -40,8 +40,11 @@ export class DelayedRunSystem { return await this.$.runLock.lock("rescheduleDelayedRun", [runId], async () => { const snapshot = await getLatestExecutionSnapshot(prisma, runId); - //if the run isn't just created then we can't reschedule it - if (snapshot.executionStatus !== "RUN_CREATED") { + // Check if the run is still in DELAYED status (or legacy RUN_CREATED for older runs) + if ( + snapshot.executionStatus !== "DELAYED" && + snapshot.executionStatus !== "RUN_CREATED" + ) { throw new ServiceValidationError("Cannot reschedule a run that is not delayed"); } @@ -54,9 +57,9 @@ export class DelayedRunSystem { executionSnapshots: { create: { engine: "V2", - executionStatus: "RUN_CREATED", + executionStatus: "DELAYED", description: "Delayed run was rescheduled to a future date", - runStatus: "EXPIRED", + runStatus: "DELAYED", environmentId: snapshot.environmentId, environmentType: snapshot.environmentType, projectId: snapshot.projectId, @@ -98,71 +101,101 @@ export class DelayedRunSystem { } async enqueueDelayedRun({ runId }: { runId: string }) { - const run = await this.$.prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, + // Use lock to prevent race with debounce rescheduling + return await this.$.runLock.lock("enqueueDelayedRun", [runId], async () => { + // Check if run is still in DELAYED status before enqueuing + // This prevents a race where debounce reschedules the run while we're about to enqueue it + const snapshot = await getLatestExecutionSnapshot(this.$.prisma, runId); + + if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { + this.$.logger.debug("enqueueDelayedRun: run is no longer delayed, skipping enqueue", { + runId, + executionStatus: snapshot.executionStatus, + }); + return; + } + + const run = await this.$.prisma.taskRun.findFirst({ + where: { id: runId }, + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + }, }, }, - }, - }); - - if (!run) { - throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); - } + }); - // Now we need to enqueue the run into the RunQueue - await this.enqueueSystem.enqueueRun({ - run, - env: run.runtimeEnvironment, - batchId: run.batchId ?? undefined, - }); - - const queuedAt = new Date(); + if (!run) { + throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); + } - const updatedRun = await this.$.prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "PENDING", - queuedAt, - }, - }); + // Check if delayUntil has been rescheduled to the future (e.g., by debounce) + // If so, don't enqueue - the rescheduled worker job will handle it + if (run.delayUntil && run.delayUntil > new Date()) { + this.$.logger.debug( + "enqueueDelayedRun: delay was rescheduled to the future, skipping enqueue", + { + runId, + delayUntil: run.delayUntil, + } + ); + return; + } - this.$.eventBus.emit("runEnqueuedAfterDelay", { - time: new Date(), - run: { - id: runId, - status: "PENDING", - queuedAt, - updatedAt: updatedRun.updatedAt, - createdAt: updatedRun.createdAt, - }, - organization: { - id: run.runtimeEnvironment.organizationId, - }, - project: { - id: run.runtimeEnvironment.projectId, - }, - environment: { - id: run.runtimeEnvironmentId, - }, - }); + // Now we need to enqueue the run into the RunQueue + // Skip the lock in enqueueRun since we already hold it + await this.enqueueSystem.enqueueRun({ + run, + env: run.runtimeEnvironment, + batchId: run.batchId ?? undefined, + skipRunLock: true, + }); + + const queuedAt = new Date(); + + const updatedRun = await this.$.prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "PENDING", + queuedAt, + }, + }); + + this.$.eventBus.emit("runEnqueuedAfterDelay", { + time: new Date(), + run: { + id: runId, + status: "PENDING", + queuedAt, + updatedAt: updatedRun.updatedAt, + createdAt: updatedRun.createdAt, + }, + organization: { + id: run.runtimeEnvironment.organizationId, + }, + project: { + id: run.runtimeEnvironment.projectId, + }, + environment: { + id: run.runtimeEnvironmentId, + }, + }); - if (run.ttl) { - const expireAt = parseNaturalLanguageDuration(run.ttl); + if (run.ttl) { + const expireAt = parseNaturalLanguageDuration(run.ttl); - if (expireAt) { - await this.$.worker.enqueue({ - id: `expireRun:${runId}`, - job: "expireRun", - payload: { runId }, - availableAt: expireAt, - }); + if (expireAt) { + await this.$.worker.enqueue({ + id: `expireRun:${runId}`, + job: "expireRun", + payload: { runId }, + availableAt: expireAt, + }); + } } - } + }); } async scheduleDelayedRunEnqueuing({ runId, delayUntil }: { runId: string; delayUntil: Date }) { diff --git a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts index 181a6fe277..40a92abb55 100644 --- a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts @@ -576,6 +576,18 @@ export class WaitpointSystem { reason: "run is already executing", }; } + case "DELAYED": { + this.$.logger.debug(`continueRunIfUnblocked: run is delayed, skipping`, { + runId, + snapshot, + executionStatus: snapshot.executionStatus, + }); + + return { + status: "skipped", + reason: "run is delayed", + }; + } case "QUEUED": { this.$.logger.info(`continueRunIfUnblocked: run is queued, skipping`, { runId, diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts new file mode 100644 index 0000000000..0c3d09d887 --- /dev/null +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -0,0 +1,2174 @@ +import { containerTest, assertNonNullable } from "@internal/testcontainers"; +import { trace } from "@internal/tracing"; +import { expect } from "vitest"; +import { RunEngine } from "../index.js"; +import { setTimeout } from "timers/promises"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "./setup.js"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunEngine debounce", () => { + containerTest("Basic debounce: first trigger creates run", async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, // 1 minute max debounce + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger with debounce + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + expect(run.friendlyId).toBe("run_deb1"); + expect(run.status).toBe("DELAYED"); + + // Verify debounce is stored in the run + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run.id }, + }); + assertNonNullable(dbRun); + const debounce = dbRun.debounce as { key: string; delay: string } | null; + expect(debounce?.key).toBe("user-123"); + expect(debounce?.delay).toBe("5s"); + + // Verify execution status is DELAYED + const executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + } finally { + await engine.quit(); + } + }); + + containerTest( + "Debounce: multiple triggers return same run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Second trigger should return same run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Both should return the same run (first run wins) + expect(run2.id).toBe(run1.id); + expect(run2.friendlyId).toBe(run1.friendlyId); + + // Only one run should exist in DB + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(1); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: delay extension on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const initialDelay = 1000; + const initialDelayUntil = new Date(Date.now() + initialDelay); + + // First trigger + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: initialDelayUntil, + debounce: { + key: "user-123", + delay: "1s", + }, + }, + prisma + ); + + const originalDelayUntil = run1.delayUntil; + assertNonNullable(originalDelayUntil); + + // Wait a bit then trigger again + await setTimeout(300); + + // Second trigger should extend the delay + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 1000), + debounce: { + key: "user-123", + delay: "1s", + }, + }, + prisma + ); + + // Same run returned + expect(run2.id).toBe(run1.id); + + // delayUntil should have been extended + const updatedRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(updatedRun); + assertNonNullable(updatedRun.delayUntil); + + // The new delayUntil should be later than the original + expect(updatedRun.delayUntil.getTime()).toBeGreaterThan(originalDelayUntil.getTime()); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: different keys create separate runs", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger with key "user-123" + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Trigger with different key "user-456" + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-456", + delay: "5s", + }, + }, + prisma + ); + + // Different keys should create different runs + expect(run2.id).not.toBe(run1.id); + + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(2); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: run executes after final delay", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with 1s delay + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 1000), + debounce: { + key: "user-123", + delay: "1s", + }, + }, + prisma + ); + + // Verify it's in DELAYED status + let executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + + // Wait for delay to pass + await setTimeout(1500); + + // Should now be QUEUED + executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("QUEUED"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: no longer works after run is enqueued", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with short delay + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 300), + debounce: { + key: "user-123", + delay: "300ms", + }, + }, + prisma + ); + + // Wait for run to be enqueued + await setTimeout(800); + + // Verify first run is now QUEUED + const executionData = await engine.getRunExecutionData({ runId: run1.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("QUEUED"); + + // New trigger with same key should create a NEW run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Should be a different run + expect(run2.id).not.toBe(run1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: max duration exceeded creates new run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Set a very short max debounce duration + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 500, // Very short max duration + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 2000), + debounce: { + key: "user-123", + delay: "2s", + }, + }, + prisma + ); + + // Wait for max duration to be exceeded + await setTimeout(700); + + // Second trigger should create a new run because max duration exceeded + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 2000), + debounce: { + key: "user-123", + delay: "2s", + }, + }, + prisma + ); + + // Should be a different run because max duration exceeded + expect(run2.id).not.toBe(run1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce keys are scoped to task identifier", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier1 = "test-task-1"; + const taskIdentifier2 = "test-task-2"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier1); + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier2); + + // Trigger task 1 with debounce key + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_task1", + environment: authenticatedEnvironment, + taskIdentifier: taskIdentifier1, + payload: '{"data": "task1"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: `task/${taskIdentifier1}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Trigger task 2 with same debounce key - should create separate run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_task2", + environment: authenticatedEnvironment, + taskIdentifier: taskIdentifier2, + payload: '{"data": "task2"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: `task/${taskIdentifier2}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Should be different runs (debounce scoped to task) + expect(run2.id).not.toBe(run1.id); + expect(run1.taskIdentifier).toBe(taskIdentifier1); + expect(run2.taskIdentifier).toBe(taskIdentifier2); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce with triggerAndWait: parent blocked by debounced child run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const parentTask = "parent-task"; + const childTask = "child-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, childTask]); + + // Trigger parent run + const parentRun = await engine.trigger( + { + number: 1, + friendlyId: "run_parent1", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue parent and create the attempt + await setTimeout(500); + const dequeued = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12345", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun.id, + snapshotId: dequeued[0].snapshot.id, + }); + + // First triggerAndWait with debounce - creates child run + const childRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_child1", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Verify parent is blocked + const parentExecData1 = await engine.getRunExecutionData({ runId: parentRun.id }); + assertNonNullable(parentExecData1); + expect(parentExecData1.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Verify child run is in DELAYED status + const childExecData1 = await engine.getRunExecutionData({ runId: childRun1.id }); + assertNonNullable(childExecData1); + expect(childExecData1.snapshot.executionStatus).toBe("DELAYED"); + + // Check that parent is blocked by the child's waitpoint + const runWaitpoint = await prisma.taskRunWaitpoint.findFirst({ + where: { + taskRunId: parentRun.id, + }, + include: { + waitpoint: true, + }, + }); + assertNonNullable(runWaitpoint); + expect(runWaitpoint.waitpoint.type).toBe("RUN"); + expect(runWaitpoint.waitpoint.completedByTaskRunId).toBe(childRun1.id); + + // Second triggerAndWait with same debounce key should return same child run + const childRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_child2", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Should return the same child run (debounced) + expect(childRun2.id).toBe(childRun1.id); + + // Only one child run should exist + const childRuns = await prisma.taskRun.findMany({ + where: { + taskIdentifier: childTask, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(childRuns.length).toBe(1); + + // Parent should still be blocked by the same child run's waitpoint + const parentExecData2 = await engine.getRunExecutionData({ runId: parentRun.id }); + assertNonNullable(parentExecData2); + expect(parentExecData2.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce with triggerAndWait: second parent also blocked by same child", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const parentTask = "parent-task"; + const childTask = "child-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, childTask]); + + // Trigger first parent run + const parentRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_parent1", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue first parent and start attempt + await setTimeout(500); + const dequeued1 = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12345", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun1.id, + snapshotId: dequeued1[0].snapshot.id, + }); + + // First parent triggers child with debounce + const childRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_child1", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun1.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Verify first parent is blocked + const parent1ExecData = await engine.getRunExecutionData({ runId: parentRun1.id }); + assertNonNullable(parent1ExecData); + expect(parent1ExecData.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Trigger second parent run + const parentRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_parent2", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue second parent and start attempt + await setTimeout(500); + const dequeued2 = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12346", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun2.id, + snapshotId: dequeued2[0].snapshot.id, + }); + + // Second parent triggers same child with debounce - should return existing child + const childRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_child2", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12348", + spanId: "s12348", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun2.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Should return the same child run + expect(childRun2.id).toBe(childRun1.id); + + // Second parent should also be blocked by the same child run + const parent2ExecData = await engine.getRunExecutionData({ runId: parentRun2.id }); + assertNonNullable(parent2ExecData); + expect(parent2ExecData.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Both parents should have waitpoints pointing to the same child + const waitpoints = await prisma.taskRunWaitpoint.findMany({ + where: { + taskRunId: { in: [parentRun1.id, parentRun2.id] }, + }, + include: { + waitpoint: true, + }, + }); + expect(waitpoints.length).toBe(2); + expect(waitpoints[0].waitpoint.completedByTaskRunId).toBe(childRun1.id); + expect(waitpoints[1].waitpoint.completedByTaskRunId).toBe(childRun1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: keys scoped to environment", + async ({ prisma, redisOptions }) => { + // Create production environment (also creates org and project) + const prodEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create a second environment (development) within the same org/project + const devEnvironment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev-slug", + projectId: prodEnvironment.projectId, + organizationId: prodEnvironment.organizationId, + apiKey: "dev_api_key", + pkApiKey: "dev_pk_api_key", + shortcode: "dev_short", + maximumConcurrencyLimit: 10, + }, + include: { + project: true, + organization: true, + orgMember: true, + }, + }); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, prodEnvironment, taskIdentifier); + await setupBackgroundWorker(engine, devEnvironment, taskIdentifier); + + // Trigger in production environment + const runProd = await engine.trigger( + { + number: 1, + friendlyId: "run_prod1", + environment: prodEnvironment, + taskIdentifier, + payload: '{"env": "prod"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "same-key", + delay: "5s", + }, + }, + prisma + ); + + // Trigger in development environment with same key - should create separate run + const runDev = await engine.trigger( + { + number: 2, + friendlyId: "run_dev1", + environment: devEnvironment, + taskIdentifier, + payload: '{"env": "dev"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "same-key", + delay: "5s", + }, + }, + prisma + ); + + // Should be different runs (debounce scoped to environment) + expect(runDev.id).not.toBe(runProd.id); + expect(runProd.runtimeEnvironmentId).toBe(prodEnvironment.id); + expect(runDev.runtimeEnvironmentId).toBe(devEnvironment.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: concurrent triggers only create one run (distributed race protection)", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger multiple runs concurrently with the same debounce key + // This simulates the distributed race condition where multiple servers + // try to create runs at the exact same time + const concurrentTriggers = Promise.all([ + engine.trigger( + { + number: 1, + friendlyId: "run_conc1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + engine.trigger( + { + number: 2, + friendlyId: "run_conc2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + engine.trigger( + { + number: 3, + friendlyId: "run_conc3", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "third"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + ]); + + const [run1, run2, run3] = await concurrentTriggers; + + // All should return the same run (one won the claim, others waited and got it) + expect(run2.id).toBe(run1.id); + expect(run3.id).toBe(run1.id); + + // Only one run should exist in DB + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(1); + + // The run should be in DELAYED status + const executionData = await engine.getRunExecutionData({ runId: run1.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce trailing mode: updates payload on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run with trailing mode + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailing1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + expect(run1.friendlyId).toBe("run_trailing1"); + expect(run1.payload).toBe('{"data": "first"}'); + + // Second trigger with trailing mode should update the payload + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_trailing2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Should return the same run + expect(run2.id).toBe(run1.id); + + // Verify the payload was updated to the second trigger's payload + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.payload).toBe('{"data": "second"}'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce trailing mode: updates metadata on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with metadata + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailingmeta1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + metadata: '{"version": 1}', + metadataType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-meta-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Second trigger with different metadata + await engine.trigger( + { + number: 2, + friendlyId: "run_trailingmeta2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + metadata: '{"version": 2, "extra": "field"}', + metadataType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-meta-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify metadata was updated + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.metadata).toBe('{"version": 2, "extra": "field"}'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce trailing mode: updates maxAttempts and maxDuration", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with maxAttempts and maxDuration + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailingopts1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + maxAttempts: 3, + maxDurationInSeconds: 60, + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-opts-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify initial values + let dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.maxAttempts).toBe(3); + expect(dbRun.maxDurationInSeconds).toBe(60); + + // Second trigger with different maxAttempts and maxDuration + await engine.trigger( + { + number: 2, + friendlyId: "run_trailingopts2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + maxAttempts: 5, + maxDurationInSeconds: 120, + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-opts-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify values were updated + dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.maxAttempts).toBe(5); + expect(dbRun.maxDurationInSeconds).toBe(120); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce leading mode (default): does NOT update payload", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run (leading mode - default) + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_leading1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "leading-key", + delay: "5s", + // mode: "leading" is default, not specifying it + }, + }, + prisma + ); + + // Second trigger should NOT update the payload (leading mode) + await engine.trigger( + { + number: 2, + friendlyId: "run_leading2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "leading-key", + delay: "5s", + }, + }, + prisma + ); + + // Verify the payload is still the first trigger's payload + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.payload).toBe('{"data": "first"}'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "registerDebouncedRun: atomic claim prevents overwrite when claim is lost", + async ({ prisma, redisOptions }) => { + // This test verifies the fix for the TOCTOU race condition in registerDebouncedRun. + // The race occurs when: + // 1. Server A claims debounce key with claimId-A + // 2. Server B claims same key with claimId-B (after A's claim expires) + // 3. Server B registers runId-B successfully + // 4. Server A attempts to register runId-A with stale claimId-A + // Without the fix, step 4 would overwrite runId-B. With the fix, it fails atomically. + + const { createRedisClient } = await import("@internal/redis"); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + // Create a separate Redis client to simulate "another server" modifying keys directly + const simulatedServerRedis = createRedisClient({ + ...redisOptions, + keyPrefix: `${redisOptions.keyPrefix ?? ""}debounce:`, + }); + + try { + const taskIdentifier = "test-task"; + const debounceKey = "race-test-key"; + const environmentId = authenticatedEnvironment.id; + const delayUntil = new Date(Date.now() + 60_000); + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Construct the Redis key (same format as DebounceSystem.getDebounceRedisKey) + const redisKey = `${environmentId}:${taskIdentifier}:${debounceKey}`; + + // Step 1: Server A claims the key with claimId-A + const claimIdA = "claim-server-A"; + await simulatedServerRedis.set(redisKey, `pending:${claimIdA}`, "PX", 60_000); + + // Step 2 & 3: Simulate Server B claiming and registering (after A's claim "expires") + // In reality, this simulates the race where B's claim overwrites A's pending claim + const runIdB = "run_server_B"; + await simulatedServerRedis.set(redisKey, runIdB, "PX", 60_000); + + // Verify Server B's registration is in place + const valueAfterB = await simulatedServerRedis.get(redisKey); + expect(valueAfterB).toBe(runIdB); + + // Step 4: Server A attempts to register with its stale claimId-A + // This should FAIL because the key no longer contains "pending:claim-server-A" + const runIdA = "run_server_A"; + const registered = await engine.debounceSystem.registerDebouncedRun({ + runId: runIdA, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId: claimIdA, // Stale claim ID + }); + + // Step 5: Verify Server A's registration failed + expect(registered).toBe(false); + + // Step 6: Verify Redis still contains runId-B (not overwritten by Server A) + const finalValue = await simulatedServerRedis.get(redisKey); + expect(finalValue).toBe(runIdB); + } finally { + await simulatedServerRedis.quit(); + await engine.quit(); + } + } + ); + + containerTest( + "waitForExistingRun: returns claimId when key expires during wait", + async ({ prisma, redisOptions }) => { + // This test verifies the fix for the race condition where waitForExistingRun + // returns { status: "new" } without a claimId. Without the fix: + // 1. Server A's pending claim expires + // 2. Server B's waitForExistingRun detects key is gone, returns { status: "new" } (no claimId) + // 3. Server C atomically claims the key and registers runId-C + // 4. Server B calls registerDebouncedRun without claimId, does plain SET, overwrites runId-C + // + // With the fix, step 2 atomically claims the key before returning, preventing step 4's overwrite. + + const { createRedisClient } = await import("@internal/redis"); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + // Create a separate Redis client to simulate "another server" modifying keys directly + const simulatedServerRedis = createRedisClient({ + ...redisOptions, + keyPrefix: `${redisOptions.keyPrefix ?? ""}debounce:`, + }); + + try { + const taskIdentifier = "test-task"; + const debounceKey = "wait-race-test-key"; + const environmentId = authenticatedEnvironment.id; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Construct the Redis key (same format as DebounceSystem.getDebounceRedisKey) + const redisKey = `${environmentId}:${taskIdentifier}:${debounceKey}`; + + // Step 1: Server A claims the key with a pending claim + const claimIdA = "claim-server-A"; + await simulatedServerRedis.set(redisKey, `pending:${claimIdA}`, "PX", 60_000); + + // Step 2: Delete the key to simulate Server A's claim expiring + await simulatedServerRedis.del(redisKey); + + // Step 3: Server B calls handleDebounce - since key is gone, it should atomically claim + const debounceResult = await engine.debounceSystem.handleDebounce({ + environmentId, + taskIdentifier, + debounce: { + key: debounceKey, + delay: "5s", + }, + }); + + // Step 4: Verify result is { status: "new" } WITH a claimId + expect(debounceResult.status).toBe("new"); + if (debounceResult.status === "new") { + expect(debounceResult.claimId).toBeDefined(); + expect(typeof debounceResult.claimId).toBe("string"); + expect(debounceResult.claimId!.length).toBeGreaterThan(0); + + // Step 5: Verify the key now contains Server B's pending claim + const valueAfterB = await simulatedServerRedis.get(redisKey); + expect(valueAfterB).toBe(`pending:${debounceResult.claimId}`); + + // Step 6: Server C tries to claim the same key - should fail + const claimIdC = "claim-server-C"; + const claimResultC = await simulatedServerRedis.set( + redisKey, + `pending:${claimIdC}`, + "PX", + 60_000, + "NX" + ); + expect(claimResultC).toBeNull(); // NX fails because key exists + + // Step 7: Server B registers its run using its claimId + const runIdB = "run_server_B"; + const delayUntil = new Date(Date.now() + 60_000); + const registered = await engine.debounceSystem.registerDebouncedRun({ + runId: runIdB, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId: debounceResult.claimId, + }); + + // Step 8: Verify Server B's registration succeeded + expect(registered).toBe(true); + + // Step 9: Verify Redis contains Server B's run ID + const finalValue = await simulatedServerRedis.get(redisKey); + expect(finalValue).toBe(runIdB); + } + } finally { + await simulatedServerRedis.quit(); + await engine.quit(); + } + } + ); +}); + diff --git a/internal-packages/run-engine/src/engine/tests/delays.test.ts b/internal-packages/run-engine/src/engine/tests/delays.test.ts index 162967e9e9..8a93aa1ad1 100644 --- a/internal-packages/run-engine/src/engine/tests/delays.test.ts +++ b/internal-packages/run-engine/src/engine/tests/delays.test.ts @@ -73,10 +73,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); //wait for 1 seconds await setTimeout(1_000); @@ -155,10 +155,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); const rescheduleTo = new Date(Date.now() + 1_500); const updatedRun = await engine.rescheduleDelayedRun({ @@ -170,10 +170,10 @@ describe("RunEngine delays", () => { //wait so the initial delay passes await setTimeout(1_000); - //should still be created + //should still be delayed (rescheduled) const executionData2 = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData2); - expect(executionData2.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData2.snapshot.executionStatus).toBe("DELAYED"); //wait so the updated delay passes await setTimeout(1_750); @@ -253,10 +253,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); expect(run.status).toBe("DELAYED"); //wait for 1 seconds @@ -356,10 +356,10 @@ describe("RunEngine delays", () => { prisma ); - //verify it's created but not queued + //verify it's delayed but not queued const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); expect(run.status).toBe("DELAYED"); //cancel the run @@ -401,4 +401,110 @@ describe("RunEngine delays", () => { await engine.quit(); } }); + + containerTest( + "enqueueDelayedRun respects rescheduled delayUntil", + async ({ prisma, redisOptions }) => { + // This test verifies the race condition fix where if delayUntil is updated + // (e.g., by debounce reschedule) while the worker job is executing, + // the run should NOT be enqueued at the original time. + // + // The race condition occurs when: + // 1. Worker job is scheduled for T1 + // 2. rescheduleDelayedRun updates delayUntil to T2 in DB + // 3. worker.reschedule() tries to update the job, but it's already dequeued + // 4. Original worker job fires and calls enqueueDelayedRun + // + // Without the fix: Run would be enqueued at T1 (wrong!) + // With the fix: enqueueDelayedRun checks delayUntil > now and skips + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Create a delayed run with a short delay (300ms) + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_1235", + environment: authenticatedEnvironment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 300), + }, + prisma + ); + + // Verify it's delayed + const executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + + // Simulate race condition: directly update delayUntil in the database to a future time + // This simulates what happens when rescheduleDelayedRun updates the DB but the + // worker.reschedule() call doesn't affect the already-dequeued job + const newDelayUntil = new Date(Date.now() + 10_000); // 10 seconds in the future + await prisma.taskRun.update({ + where: { id: run.id }, + data: { delayUntil: newDelayUntil }, + }); + + // Wait past the original delay (500ms) so the worker job fires + await setTimeout(500); + + // KEY ASSERTION: The run should still be DELAYED because the fix checks delayUntil > now + // Without the fix, the run would be QUEUED here (wrong!) + const executionData2 = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData2); + expect(executionData2.snapshot.executionStatus).toBe("DELAYED"); + + // Note: We don't test the run eventually becoming QUEUED here because we only + // updated the DB (simulating the race). In the real scenario, rescheduleDelayedRun + // would also reschedule the worker job to fire at the new delayUntil time. + } finally { + await engine.quit(); + } + } + ); }); diff --git a/internal-packages/run-engine/src/engine/types.ts b/internal-packages/run-engine/src/engine/types.ts index bdc6da4152..3b2ae8c9a1 100644 --- a/internal-packages/run-engine/src/engine/types.ts +++ b/internal-packages/run-engine/src/engine/types.ts @@ -7,7 +7,7 @@ import { RetryOptions, TriggerTraceContext, } from "@trigger.dev/core/v3"; -import { PrismaClient, PrismaReplicaClient } from "@trigger.dev/database"; +import { PrismaClient, PrismaReplicaClient, TaskRun, Waitpoint } from "@trigger.dev/database"; import { Worker, type WorkerConcurrencyOptions, @@ -83,6 +83,11 @@ export type RunEngineOptions = { /** Optional global rate limiter to limit processing across all consumers */ globalRateLimiter?: GlobalRateLimiter; }; + debounce?: { + redis?: RedisOptions; + /** Maximum duration in milliseconds that a run can be debounced. Default: 1 hour */ + maxDebounceDurationMs?: number; + }; /** If not set then checkpoints won't ever be used */ retryWarmStartThresholdMs?: number; heartbeatTimeoutsMs?: Partial; @@ -164,6 +169,21 @@ export type TriggerParams = { bulkActionId?: string; planType?: string; realtimeStreamsVersion?: string; + debounce?: { + key: string; + delay: string; + mode?: "leading" | "trailing"; + }; + /** + * Called when a run is debounced (existing delayed run found with triggerAndWait). + * Return spanIdToComplete to enable span closing when the run completes. + * This allows the webapp to create a trace span for the debounced trigger. + */ + onDebounced?: (params: { + existingRun: TaskRun; + waitpoint: Waitpoint; + debounceKey: string; + }) => Promise; }; export type EngineWorker = Worker; diff --git a/packages/cli-v3/src/build/manifests.ts b/packages/cli-v3/src/build/manifests.ts index 8b1da98ceb..f1188233a5 100644 --- a/packages/cli-v3/src/build/manifests.ts +++ b/packages/cli-v3/src/build/manifests.ts @@ -54,7 +54,7 @@ export async function copyManifestToDir( */ async function computeFileHash(filePath: string): Promise { const contents = await readFile(filePath); - return createHash("sha256").update(contents).digest("hex").slice(0, 16); + return createHash("sha256").update(contents as Uint8Array).digest("hex").slice(0, 16); } /** diff --git a/packages/cli-v3/src/entryPoints/dev-run-controller.ts b/packages/cli-v3/src/entryPoints/dev-run-controller.ts index e5578567b4..5db271f9d6 100644 --- a/packages/cli-v3/src/entryPoints/dev-run-controller.ts +++ b/packages/cli-v3/src/entryPoints/dev-run-controller.ts @@ -428,7 +428,8 @@ export class DevRunController { } case "RUN_CREATED": case "QUEUED_EXECUTING": - case "QUEUED": { + case "QUEUED": + case "DELAYED": { logger.debug("Status change not handled", { status: snapshot.executionStatus }); return; } diff --git a/packages/cli-v3/src/entryPoints/managed/execution.ts b/packages/cli-v3/src/entryPoints/managed/execution.ts index 2dd3e6838e..ec927627f3 100644 --- a/packages/cli-v3/src/entryPoints/managed/execution.ts +++ b/packages/cli-v3/src/entryPoints/managed/execution.ts @@ -372,9 +372,10 @@ export class RunExecution { return; } - case "RUN_CREATED": { + case "RUN_CREATED": + case "DELAYED": { this.sendDebugLog( - "aborting execution: invalid status change: RUN_CREATED", + "aborting execution: invalid status change: RUN_CREATED or DELAYED", snapshotMetadata ); diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 1782683969..5e5fff18ea 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -203,6 +203,13 @@ export const TriggerTaskRequestBody = z.object({ priority: z.number().optional(), bulkActionId: z.string().optional(), region: z.string().optional(), + debounce: z + .object({ + key: z.string().max(512), + delay: z.string(), + mode: z.enum(["leading", "trailing"]).optional(), + }) + .optional(), }) .optional(), }); @@ -251,6 +258,13 @@ export const BatchTriggerTaskItem = z.object({ ttl: z.string().or(z.number().nonnegative().int()).optional(), priority: z.number().optional(), region: z.string().optional(), + debounce: z + .object({ + key: z.string().max(512), + delay: z.string(), + mode: z.enum(["leading", "trailing"]).optional(), + }) + .optional(), }) .optional(), }); diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index 376a8522de..e4057e7ca6 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -13,6 +13,7 @@ export const TaskRunExecutionStatus = { SUSPENDED: "SUSPENDED", PENDING_CANCEL: "PENDING_CANCEL", FINISHED: "FINISHED", + DELAYED: "DELAYED", } satisfies Enum; export type TaskRunExecutionStatus = diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 857f0cc2f3..f463b20f49 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -896,6 +896,56 @@ export type TriggerOptions = { * ``` */ region?: string; + + /** + * Debounce settings for consolidating multiple trigger calls into a single delayed run. + * + * When a run with the same debounce key already exists in the delayed state, subsequent triggers + * "push" the existing run's execution time later rather than creating new runs. + * + * The debounce key is scoped to the task identifier, so different tasks can use the same key without conflicts. + * + * @example + * + * ```ts + * // Leading mode (default): executes with the FIRST payload + * await myTask.trigger({ some: "data1" }, { debounce: { key: "user-123", delay: "5s" } }); + * await myTask.trigger({ some: "data2" }, { debounce: { key: "user-123", delay: "5s" } }); + * // After 5 seconds, runs with { some: "data1" } + * + * // Trailing mode: executes with the LAST payload + * await myTask.trigger({ some: "data1" }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); + * await myTask.trigger({ some: "data2" }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); + * // After 5 seconds, runs with { some: "data2" } + * ``` + */ + debounce?: { + /** + * Unique key scoped to the task identifier. Runs with the same key will be debounced together. + * Maximum length is 512 characters. + */ + key: string; + /** + * Duration string specifying how long to delay the run. If another trigger with the same key + * occurs within this duration, the delay is extended. + * + * Supported formats: `{number}s` (seconds), `{number}m` (minutes), `{number}h` (hours), + * `{number}d` (days), `{number}w` (weeks). Minimum delay is 1 second. + * + * @example "1s", "5s", "1m", "30m", "1h" + */ + delay: string; + /** + * Controls which trigger's data is used when the debounced run finally executes. + * + * - `"leading"` (default): Use data from the first trigger (payload, metadata, tags, etc.) + * - `"trailing"`: Use data from the last trigger. Each subsequent trigger updates the run's + * payload, metadata, tags, maxAttempts, maxDuration, and machine preset. + * + * @default "leading" + */ + mode?: "leading" | "trailing"; + }; }; export type TriggerAndWaitOptions = Omit; diff --git a/packages/core/src/v3/zodSocket.ts b/packages/core/src/v3/zodSocket.ts index 160620c42c..5ec1f179a4 100644 --- a/packages/core/src/v3/zodSocket.ts +++ b/packages/core/src/v3/zodSocket.ts @@ -100,7 +100,7 @@ export class ZodSocketMessageHandler( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; }) @@ -904,6 +905,7 @@ export async function batchTriggerByIdAndWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; }) @@ -1163,6 +1165,7 @@ export async function batchTriggerTasks( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; }) @@ -1423,6 +1426,7 @@ export async function batchTriggerAndWaitTasks( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; } @@ -1809,6 +1814,7 @@ async function* transformBatchItemsStreamForWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; } @@ -1859,6 +1865,7 @@ async function* transformBatchByTaskItemsStream( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; } @@ -2013,6 +2022,7 @@ async function* transformSingleTaskBatchItemsStreamForWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; } @@ -2054,6 +2064,7 @@ async function trigger_internal( priority: options?.priority, region: options?.region, lockToVersion: options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: options?.debounce, }, }, { @@ -2285,6 +2296,7 @@ async function triggerAndWait_internal }) => { + logger.info("Processing data update", { payload }); + + // Simulate some processing work + await wait.for({ seconds: 1 }); + + logger.info("Data update processed successfully", { userId: payload.userId }); + + return { + processed: true, + userId: payload.userId, + timestamp: new Date().toISOString(), + }; + }, +}); + +/** + * Example 1: Basic Debounce + * + * This demonstrates how debounce works with rapid triggers. + * When triggered multiple times with the same key within the delay period, + * only one run will execute (with the first payload). + * + * Trigger this task multiple times rapidly with the same debounceKey to see + * how only one run is created. + */ +export const basicDebounceExample = task({ + id: "basic-debounce-example", + run: async (payload: { value: string; debounceKey: string }) => { + logger.info("Starting basic debounce example", { payload }); + + // Trigger processDataUpdate with debounce + // If this task is triggered multiple times within 5 seconds with the same + // debounceKey, only one processDataUpdate run will be created + const handle = await processDataUpdate.trigger( + { + userId: payload.debounceKey, + data: { value: payload.value, triggeredAt: new Date().toISOString() }, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info("Triggered processDataUpdate with debounce", { + runId: handle.id, + debounceKey: payload.debounceKey, + }); + + return { triggeredRunId: handle.id }; + }, +}); + +/** + * Demonstration: Rapid Debounce Triggering + * + * This task demonstrates debounce in action by triggering processDataUpdate + * multiple times rapidly with the same debounce key. Despite 5 triggers, + * only ONE processDataUpdate run will be created. + * + * Run this task and watch the logs - you'll see: + * - 5 "Triggering attempt" logs + * - All 5 return the SAME run ID + * - Only 1 processDataUpdate run actually executes + */ +export const demonstrateDebounce = task({ + id: "demonstrate-debounce", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "demo-key"; + + logger.info("Starting debounce demonstration", { debounceKey: key }); + logger.info("Will trigger processDataUpdate 5 times rapidly with the same debounce key"); + + const handles: string[] = []; + + // Trigger 5 times rapidly - all should return the same run + for (let i = 1; i <= 5; i++) { + logger.info(`Triggering attempt ${i}/5`, { attempt: i }); + + const handle = await processDataUpdate.trigger( + { + userId: key, + data: { + attempt: i, + triggeredAt: new Date().toISOString(), + message: `This is trigger attempt ${i}`, + }, + }, + { + debounce: { + key: key, + delay: "5s", + }, + } + ); + + handles.push(handle.id); + logger.info(`Attempt ${i} returned run ID: ${handle.id}`, { + attempt: i, + runId: handle.id, + }); + + // Small delay between triggers (but still within debounce window) + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + // Check if all handles are the same (they should be!) + const uniqueHandles = [...new Set(handles)]; + const allSameRun = uniqueHandles.length === 1; + + logger.info("Debounce demonstration complete", { + totalTriggers: 5, + uniqueRuns: uniqueHandles.length, + allSameRun, + runIds: handles, + }); + + if (allSameRun) { + logger.info("SUCCESS: All 5 triggers returned the same run ID - debounce is working!"); + } else { + logger.warn("UNEXPECTED: Multiple runs were created", { uniqueHandles }); + } + + return { + debounceKey: key, + totalTriggers: 5, + uniqueRunsCreated: uniqueHandles.length, + allSameRun, + runId: uniqueHandles[0], + }; + }, +}); + +/** + * Demonstration: Debounce with triggerAndWait + * + * This shows how multiple parent tasks can wait on the same debounced child. + * Each parent task calls triggerAndWait with the same debounce key. + * All parents will be blocked by and receive the result from the SAME child run. + * + * To test this: + * 1. Run "demonstrate-debounce-trigger-and-wait-orchestrator" + * 2. Watch as 3 parent runs are created + * 3. All 3 parents will wait for the SAME debounced child run + * 4. When the child completes, all 3 parents complete with the same result + */ + +// Parent task that calls triggerAndWait with debounce +export const debounceTriggerAndWaitParent = task({ + id: "debounce-trigger-and-wait-parent", + run: async (payload: { parentNumber: number; debounceKey: string }) => { + logger.info(`Parent ${payload.parentNumber}: Starting`, { + parentNumber: payload.parentNumber, + debounceKey: payload.debounceKey, + }); + + logger.info(`Parent ${payload.parentNumber}: Calling triggerAndWait with debounce`); + + // This will be debounced - if another parent calls with the same key, + // they'll both wait for the same child run + const result = await processDataUpdate.triggerAndWait( + { + userId: payload.debounceKey, + data: { + parentNumber: payload.parentNumber, + triggeredAt: new Date().toISOString(), + }, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info(`Parent ${payload.parentNumber}: Got result from child`, { result }); + + if (result.ok) { + return { + parentNumber: payload.parentNumber, + childOutput: result.output, + success: true, + }; + } else { + return { + parentNumber: payload.parentNumber, + error: "Child task failed", + success: false, + }; + } + }, +}); + +// Orchestrator that triggers multiple parents (without waiting) +export const demonstrateDebounceTriggerAndWaitOrchestrator = task({ + id: "demonstrate-debounce-trigger-and-wait-orchestrator", + run: async (payload: { debounceKey?: string; parentCount?: number }) => { + const key = payload.debounceKey ?? "wait-demo-key"; + const count = payload.parentCount ?? 3; + + logger.info("Starting debounce triggerAndWait demonstration", { + debounceKey: key, + parentCount: count, + }); + + logger.info( + `Triggering ${count} parent tasks - each will call triggerAndWait with the same debounce key` + ); + logger.info("All parents should be blocked by the SAME debounced child run"); + + const handles: string[] = []; + + // Trigger multiple parent tasks as fast as possible (no delay) to maximize race condition chance + for (let i = 1; i <= count; i++) { + const handle = await debounceTriggerAndWaitParent.trigger({ + parentNumber: i, + debounceKey: key, + }); + + logger.info(`Triggered parent ${i}`, { runId: handle.id }); + handles.push(handle.id); + } + + logger.info("All parent tasks triggered", { + parentRunIds: handles, + debounceKey: key, + }); + + logger.info( + "Watch the parent runs - they should all complete around the same time when the single debounced child finishes" + ); + + return { + debounceKey: key, + parentCount: count, + parentRunIds: handles, + message: `Triggered ${count} parent tasks. They will all wait for the same debounced child.`, + }; + }, +}); + +/** + * Example 2: User Activity Debouncing + * + * A real-world use case: debouncing user activity updates. + * When a user performs multiple actions in quick succession, + * we only want to process the final state after they've stopped. + * + * Common use cases: + * - Search-as-you-type + * - Form auto-save + * - Activity logging + * - Rate limiting user actions + */ +export const syncUserActivity = task({ + id: "sync-user-activity", + run: async (payload: { + userId: string; + activityType: string; + details: Record; + }) => { + logger.info("Syncing user activity", { payload }); + + // Simulate syncing to external service + await wait.for({ seconds: 2 }); + + logger.info("User activity synced", { + userId: payload.userId, + activityType: payload.activityType, + }); + + return { + synced: true, + syncedAt: new Date().toISOString(), + }; + }, +}); + +export const trackUserActivity = task({ + id: "track-user-activity", + run: async (payload: { userId: string; action: string; metadata?: Record }) => { + logger.info("Tracking user activity", { payload }); + + // Debounce per user - if the same user performs multiple actions, + // only sync once after 10 seconds of inactivity + const handle = await syncUserActivity.trigger( + { + userId: payload.userId, + activityType: payload.action, + details: { + ...payload.metadata, + lastAction: payload.action, + lastActionAt: new Date().toISOString(), + }, + }, + { + debounce: { + // Key is scoped to the user, so each user has their own debounce window + key: `user-${payload.userId}`, + delay: "10s", + }, + } + ); + + logger.info("User activity tracked (debounced)", { + userId: payload.userId, + runId: handle.id, + }); + + return { runId: handle.id }; + }, +}); + +/** + * Example 3: Document Auto-Save with Debounce + * + * Simulates a document editing system where saves are debounced + * to avoid excessive save operations during rapid editing. + */ +export const saveDocument = task({ + id: "save-document", + run: async (payload: { documentId: string; content: string; version: number }) => { + logger.info("Saving document", { + documentId: payload.documentId, + contentLength: payload.content.length, + version: payload.version, + }); + + // Simulate save operation + await wait.for({ seconds: 1 }); + + logger.info("Document saved successfully", { + documentId: payload.documentId, + savedAt: new Date().toISOString(), + }); + + return { + saved: true, + documentId: payload.documentId, + version: payload.version, + savedAt: new Date().toISOString(), + }; + }, +}); + +export const onDocumentEdit = task({ + id: "on-document-edit", + run: async (payload: { documentId: string; content: string; editorId: string }) => { + logger.info("Document edited", { + documentId: payload.documentId, + editorId: payload.editorId, + }); + + // Debounce saves per document - save only after 3 seconds of no edits + const handle = await saveDocument.trigger( + { + documentId: payload.documentId, + content: payload.content, + version: Date.now(), + }, + { + debounce: { + // Key is scoped to the document, so each document has its own debounce + key: `doc-${payload.documentId}`, + delay: "3s", + }, + } + ); + + return { + acknowledged: true, + pendingSaveRunId: handle.id, + }; + }, +}); + +/** + * Example 4: Webhook Consolidation + * + * When receiving many webhooks from an external service, + * debounce to consolidate them into fewer processing runs. + */ +export const processWebhookBatch = task({ + id: "process-webhook-batch", + run: async (payload: { source: string; eventType: string; data: unknown }) => { + logger.info("Processing webhook batch", { + source: payload.source, + eventType: payload.eventType, + }); + + // Process the webhook data + await wait.for({ seconds: 2 }); + + logger.info("Webhook batch processed", { + source: payload.source, + eventType: payload.eventType, + }); + + return { + processed: true, + processedAt: new Date().toISOString(), + }; + }, +}); + +export const handleWebhook = task({ + id: "handle-webhook", + run: async (payload: { source: string; eventType: string; webhookId: string; data: unknown }) => { + logger.info("Received webhook", { + source: payload.source, + eventType: payload.eventType, + webhookId: payload.webhookId, + }); + + // Debounce webhooks from the same source and event type + // This consolidates rapid webhook bursts into single processing runs + const handle = await processWebhookBatch.trigger( + { + source: payload.source, + eventType: payload.eventType, + data: payload.data, + }, + { + debounce: { + key: `webhook-${payload.source}-${payload.eventType}`, + delay: "2s", + }, + } + ); + + logger.info("Webhook queued for processing (debounced)", { + webhookId: payload.webhookId, + runId: handle.id, + }); + + return { + acknowledged: true, + processingRunId: handle.id, + }; + }, +}); + +/** + * Example 5: Debounce with triggerAndWait + * + * When using triggerAndWait with debounce, the parent task will be blocked + * by the debounced child run. If another parent triggers with the same + * debounce key, it will also be blocked by the SAME child run. + */ +export const debouncedChildTask = task({ + id: "debounced-child-task", + run: async (payload: { key: string; value: string }) => { + logger.info("Debounced child task executing", { payload }); + + await wait.for({ seconds: 3 }); + + logger.info("Debounced child task completed", { key: payload.key }); + + return { + result: `Processed: ${payload.value}`, + completedAt: new Date().toISOString(), + }; + }, +}); + +export const parentWithDebouncedChild = task({ + id: "parent-with-debounced-child", + run: async (payload: { parentId: string; debounceKey: string; data: string }) => { + logger.info("Parent task starting", { parentId: payload.parentId }); + + // triggerAndWait with debounce - the parent will wait for the debounced child + // If another parent triggers with the same debounce key, they'll both wait + // for the same child run + const result = await debouncedChildTask.triggerAndWait( + { + key: payload.debounceKey, + value: payload.data, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info("Parent task completed", { + parentId: payload.parentId, + childResult: result, + }); + + if (result.ok) { + return { + parentId: payload.parentId, + childOutput: result.output, + }; + } else { + return { + parentId: payload.parentId, + error: "Child task failed", + }; + } + }, +}); + +/** + * Example 6: Different Delay Durations + * + * Shows various delay duration formats supported by debounce. + */ +export const shortDebounce = task({ + id: "short-debounce", + run: async (payload: { key: string }) => { + logger.info("Short debounce task (1s)", { key: payload.key }); + return { key: payload.key, delay: "1s" }; + }, +}); + +export const mediumDebounce = task({ + id: "medium-debounce", + run: async (payload: { key: string }) => { + logger.info("Medium debounce task (5s)", { key: payload.key }); + return { key: payload.key, delay: "5s" }; + }, +}); + +export const longDebounce = task({ + id: "long-debounce", + run: async (payload: { key: string }) => { + logger.info("Long debounce task (1m)", { key: payload.key }); + return { key: payload.key, delay: "1m" }; + }, +}); + +export const testDifferentDelays = task({ + id: "test-different-delays", + run: async (payload: { key: string }) => { + logger.info("Testing different debounce delays", { key: payload.key }); + + // 1 second debounce - good for rapid UI updates + await shortDebounce.trigger( + { key: `${payload.key}-short` }, + { debounce: { key: `${payload.key}-short`, delay: "1s" } } + ); + + // 5 second debounce - good for user input + await mediumDebounce.trigger( + { key: `${payload.key}-medium` }, + { debounce: { key: `${payload.key}-medium`, delay: "5s" } } + ); + + // 1 minute debounce - good for batch processing + await longDebounce.trigger( + { key: `${payload.key}-long` }, + { debounce: { key: `${payload.key}-long`, delay: "1m" } } + ); + + return { triggered: true }; + }, +}); + +/** + * Example 7: Batch Trigger with Debounce + * + * Demonstrates using debounce with batchTrigger. + * Each item in the batch can have its own debounce key and delay. + * Items with the same debounce key will be consolidated into a single run. + */ +export const batchItemTask = task({ + id: "batch-item-task", + run: async (payload: { itemId: string; data: string }) => { + logger.info("Processing batch item", { payload }); + + await wait.for({ seconds: 1 }); + + logger.info("Batch item processed", { itemId: payload.itemId }); + + return { + processed: true, + itemId: payload.itemId, + processedAt: new Date().toISOString(), + }; + }, +}); + +/** + * Demonstrates batch.trigger() with debounce options on individual items. + * + * This shows how you can: + * - Use different debounce keys for different items + * - Items with the same debounce key will be consolidated + * - Items with different keys will create separate runs + * + * Run this task and watch: + * - Items 1 and 3 share debounce key "group-a" -> ONE run + * - Items 2 and 4 share debounce key "group-b" -> ONE run + * - Item 5 has unique key "group-c" -> ONE run + * - Total: 3 runs instead of 5 (but batch shows 5 items) + * + * Note: The batch itself still reports 5 items, but only 3 actual task runs + * will execute due to debouncing. + */ +export const demonstrateBatchDebounce = task({ + id: "demonstrate-batch-debounce", + run: async (payload: { prefix?: string }) => { + const prefix = payload.prefix ?? "batch-demo"; + + logger.info("Starting batch debounce demonstration"); + logger.info("Will trigger 5 items with 3 different debounce keys"); + logger.info( + "Items 1&3 share key 'group-a', items 2&4 share key 'group-b', item 5 has key 'group-c'" + ); + + // Use batch.trigger with debounce options on each item + const result = await batch.trigger([ + { + id: "batch-item-task", + payload: { itemId: `${prefix}-1`, data: "First item in group A" }, + options: { + debounce: { key: `${prefix}-group-a`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-2`, data: "First item in group B" }, + options: { + debounce: { key: `${prefix}-group-b`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-3`, data: "Second item in group A (debounced)" }, + options: { + debounce: { key: `${prefix}-group-a`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-4`, data: "Second item in group B (debounced)" }, + options: { + debounce: { key: `${prefix}-group-b`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-5`, data: "Only item in group C" }, + options: { + debounce: { key: `${prefix}-group-c`, delay: "5s" }, + }, + }, + ]); + + logger.info("Batch debounce demonstration complete", { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + note: "Check the dashboard - only 3 actual task runs should execute due to debouncing", + }); + + return { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + expectedUniqueRuns: 3, + message: + "5 items submitted, but only 3 runs will execute: group-a (1 run), group-b (1 run), group-c (1 run)", + }; + }, +}); + +/** + * Demonstrates batchTrigger on a single task with debounce. + * + * Similar to batch.trigger but using myTask.batchTrigger() syntax. + * Each item can have its own debounce configuration. + * + * When all items share the same debounce key, only ONE run will execute. + */ +export const demonstrateSingleTaskBatchDebounce = task({ + id: "demonstrate-single-task-batch-debounce", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "single-batch-demo"; + + logger.info("Starting single task batch debounce demonstration", { debounceKey: key }); + logger.info("Triggering 4 items with the SAME debounce key - only 1 run should execute"); + + // All items have the same debounce key, so they should all resolve to the same run + const result = await batchItemTask.batchTrigger([ + { + payload: { itemId: `${key}-1`, data: "Item 1" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-2`, data: "Item 2" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-3`, data: "Item 3" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-4`, data: "Item 4" }, + options: { debounce: { key, delay: "5s" } }, + }, + ]); + + logger.info("Single task batch debounce complete", { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + debounceKey: key, + note: "All items share the same debounce key, so only 1 task run should execute", + }); + + return { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + debounceKey: key, + expectedUniqueRuns: 1, + message: "4 items submitted with same debounce key - only 1 run will execute", + }; + }, +}); + +/** + * Example 8: Trailing Mode - Process Latest Data + * + * Trailing mode updates the run's payload (and other options) with each subsequent trigger. + * When the debounce window closes, the task runs with the LAST payload instead of the first. + * + * This is perfect for scenarios like: + * - Auto-saving the latest document state + * - Processing the final search query after typing stops + * - Aggregating real-time data and processing the latest snapshot + */ +export const processLatestData = task({ + id: "process-latest-data", + run: async (payload: { version: number; content: string; timestamp: string }) => { + logger.info("Processing latest data", { payload }); + + await wait.for({ seconds: 1 }); + + logger.info("Processed latest data", { + version: payload.version, + content: payload.content, + }); + + return { + processed: true, + version: payload.version, + content: payload.content, + processedAt: new Date().toISOString(), + }; + }, +}); + +/** + * Demonstrates trailing mode in action. + * + * This task triggers processLatestData 5 times rapidly with different payloads. + * With mode: "trailing", the run will execute with version 5 (the LAST payload), + * not version 1 (the first payload). + * + * Compare this to the demonstrateDebounce task which uses the default leading mode. + */ +export const demonstrateTrailingMode = task({ + id: "demonstrate-trailing-mode", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "trailing-demo-key"; + + logger.info("Starting trailing mode demonstration", { debounceKey: key }); + logger.info("Will trigger processLatestData 5 times with mode: 'trailing'"); + logger.info("The run should execute with version 5 (the LAST payload)"); + + const handles: string[] = []; + + // Trigger 5 times rapidly - with trailing mode, the LAST payload wins + for (let i = 1; i <= 5; i++) { + logger.info(`Triggering version ${i}/5`, { version: i }); + + const handle = await processLatestData.trigger( + { + version: i, + content: `Content version ${i}`, + timestamp: new Date().toISOString(), + }, + { + debounce: { + key: key, + delay: "5s", + mode: "trailing", // Use trailing mode - LAST payload wins + }, + } + ); + + handles.push(handle.id); + logger.info(`Version ${i} returned run ID: ${handle.id}`, { + version: i, + runId: handle.id, + }); + + // Small delay between triggers + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + // All handles should be the same run + const uniqueHandles = [...new Set(handles)]; + const allSameRun = uniqueHandles.length === 1; + + logger.info("Trailing mode demonstration complete", { + totalTriggers: 5, + uniqueRuns: uniqueHandles.length, + allSameRun, + note: "The run should execute with version 5 (the LAST payload)", + }); + + return { + debounceKey: key, + totalTriggers: 5, + uniqueRunsCreated: uniqueHandles.length, + allSameRun, + runId: uniqueHandles[0], + expectedPayloadVersion: 5, + message: + "With trailing mode, the run executes with the LAST payload (version 5), not the first", + }; + }, +}); + +/** + * Example 9: Document Auto-Save with Trailing Mode + * + * A practical example: when editing a document, you want to save the LATEST + * version after the user stops typing, not the first version. + * + * Trailing mode is ideal for this because: + * - Each keystroke/edit triggers a save + * - Each trigger updates the pending run's payload to the latest content + * - When typing stops, the latest content is saved + */ +export const saveDocumentLatest = task({ + id: "save-document-latest", + run: async (payload: { + documentId: string; + content: string; + editCount: number; + lastEditedAt: string; + }) => { + logger.info("Saving document (latest version)", { + documentId: payload.documentId, + contentLength: payload.content.length, + editCount: payload.editCount, + }); + + // Simulate save operation + await wait.for({ seconds: 1 }); + + logger.info("Document saved successfully with latest content", { + documentId: payload.documentId, + editCount: payload.editCount, + savedAt: new Date().toISOString(), + }); + + return { + saved: true, + documentId: payload.documentId, + editCount: payload.editCount, + contentLength: payload.content.length, + savedAt: new Date().toISOString(), + }; + }, +}); + +export const onDocumentEditWithTrailing = task({ + id: "on-document-edit-with-trailing", + run: async (payload: { documentId: string; content: string; editorId: string }) => { + // Track how many edits we've made (for demonstration) + const editCount = payload.content.length; // Using content length as a simple proxy + + logger.info("Document edited (using trailing mode)", { + documentId: payload.documentId, + editorId: payload.editorId, + editCount, + }); + + // Use trailing mode - the LATEST content will be saved + const handle = await saveDocumentLatest.trigger( + { + documentId: payload.documentId, + content: payload.content, + editCount, + lastEditedAt: new Date().toISOString(), + }, + { + debounce: { + key: `doc-${payload.documentId}`, + delay: "3s", + mode: "trailing", // Save the LATEST content, not the first + }, + } + ); + + return { + acknowledged: true, + pendingSaveRunId: handle.id, + note: "With trailing mode, the latest content will be saved after 3 seconds of no edits", + }; + }, +}); + +/** + * Example 10: Leading vs Trailing Mode Comparison + * + * This task demonstrates the difference between leading and trailing modes + * by triggering two separate debounced tasks with the same data pattern. + * + * - Leading mode task: will process version 1 (first payload) + * - Trailing mode task: will process version 5 (last payload) + */ +export const processWithLeadingMode = task({ + id: "process-with-leading-mode", + run: async (payload: { version: number }) => { + logger.info("Leading mode: Processing data", { version: payload.version }); + return { mode: "leading", version: payload.version }; + }, +}); + +export const processWithTrailingMode = task({ + id: "process-with-trailing-mode", + run: async (payload: { version: number }) => { + logger.info("Trailing mode: Processing data", { version: payload.version }); + return { mode: "trailing", version: payload.version }; + }, +}); + +export const compareLeadingAndTrailing = task({ + id: "compare-leading-and-trailing", + run: async (payload: { prefix?: string }) => { + const prefix = payload.prefix ?? "compare"; + + logger.info("Starting leading vs trailing mode comparison"); + logger.info("Triggering both modes 5 times with versions 1-5"); + logger.info("Expected: Leading mode processes v1, Trailing mode processes v5"); + + // Trigger both modes 5 times + for (let i = 1; i <= 5; i++) { + // Leading mode (default) - will keep first payload + await processWithLeadingMode.trigger( + { version: i }, + { + debounce: { + key: `${prefix}-leading`, + delay: "5s", + // mode: "leading" is the default + }, + } + ); + + // Trailing mode - will update to latest payload + await processWithTrailingMode.trigger( + { version: i }, + { + debounce: { + key: `${prefix}-trailing`, + delay: "5s", + mode: "trailing", + }, + } + ); + + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + logger.info("Comparison complete", { + leadingModeExpected: "version 1 (first payload)", + trailingModeExpected: "version 5 (last payload)", + }); + + return { + message: "Check the processWithLeadingMode and processWithTrailingMode runs", + leadingModeExpected: { version: 1 }, + trailingModeExpected: { version: 5 }, + }; + }, +}); + +/** + * Example 11: Trailing Mode with Metadata Updates + * + * Trailing mode also updates metadata, tags, maxAttempts, maxDuration, and machine. + * This example shows how metadata changes with each trigger. + */ +export const processWithMetadata = task({ + id: "process-with-metadata", + run: async (payload: { action: string }, { ctx }) => { + logger.info("Processing with metadata", { action: payload.action }); + + // The metadata will be from the LAST trigger when using trailing mode + logger.info("Run metadata reflects the latest trigger"); + + return { + action: payload.action, + processedAt: new Date().toISOString(), + }; + }, +}); + +export const demonstrateTrailingWithMetadata = task({ + id: "demonstrate-trailing-with-metadata", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "metadata-trailing-demo"; + + logger.info("Demonstrating trailing mode with metadata updates"); + + const actions = ["created", "updated", "reviewed", "approved", "published"]; + + for (const action of actions) { + await processWithMetadata.trigger( + { action }, + { + debounce: { + key, + delay: "5s", + mode: "trailing", + }, + metadata: { + lastAction: action, + actionTimestamp: new Date().toISOString(), + actionIndex: actions.indexOf(action) + 1, + }, + } + ); + + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + logger.info("Metadata trailing demonstration complete", { + expectedAction: "published", + expectedMetadata: { lastAction: "published", actionIndex: 5 }, + }); + + return { + debounceKey: key, + triggeredActions: actions, + expectedFinalAction: "published", + message: "The run will have metadata from the 'published' trigger (the last one)", + }; + }, +});