diff --git a/packages/utils/eslint.config.js b/packages/utils/eslint.config.js index ecb88a924..fb34f86af 100644 --- a/packages/utils/eslint.config.js +++ b/packages/utils/eslint.config.js @@ -18,6 +18,13 @@ export default tseslint.config( 'n/no-sync': 'off', }, }, + { + files: ['packages/utils/src/lib/profiler/trace-file-utils.ts'], + rules: { + // os.availableParallelism() is checked for existence before use, with fallback to os.cpus().length + 'n/no-unsupported-features/node-builtins': 'off', + }, + }, { files: ['**/*.json'], rules: { diff --git a/packages/utils/src/lib/process-id.ts b/packages/utils/src/lib/process-id.ts new file mode 100644 index 000000000..b18ec3e65 --- /dev/null +++ b/packages/utils/src/lib/process-id.ts @@ -0,0 +1,113 @@ +import process from 'node:process'; +import { threadId } from 'node:worker_threads'; + +/** + * Counter interface for generating sequential instance IDs. + * Encapsulates increment logic within the counter-implementation. + */ +export type Counter = { + /** + * Returns the next counter-value and increments the internal state. + * @returns The next counter-value + */ + next: () => number; +}; + +/** + * Base regex pattern for time ID format: yyyymmdd-hhmmss-ms + */ +export const TIME_ID_BASE = /\d{8}-\d{6}-\d{3}/; + +/** + * Regex patterns for validating process and instance ID formats. + * All patterns use strict anchors (^ and $) to ensure complete matches. + */ +export const ID_PATTERNS = Object.freeze({ + /** + * Time ID / Run ID format: yyyymmdd-hhmmss-ms + * Example: "20240101-120000-000" + * Used by: getUniqueTimeId() + */ + TIME_ID: new RegExp(`^${TIME_ID_BASE.source}$`), + /** + * Group ID format: alias by convention, semantically represents a group of instances + * Example: "20240101-120000-000" + * Used by: grouping related instances by time + */ + GROUP_ID: new RegExp(`^${TIME_ID_BASE.source}$`), + /** + * Process/Thread ID format: timeId-pid-threadId + * Example: "20240101-120000-000-12345-1" + * Used by: getUniqueProcessThreadId() + */ + PROCESS_THREAD_ID: new RegExp(`^${TIME_ID_BASE.source}-\\d+-\\d+$`), + /** + * Instance ID format: timeId.pid.threadId.counter + * Example: "20240101-120000-000.12345.1.1" + * Used by: getUniqueInstanceId() + */ + INSTANCE_ID: new RegExp(`^${TIME_ID_BASE.source}\\.\\d+\\.\\d+\\.\\d+$`), +} as const); + +/** + * Generates a unique run ID. + * This ID uniquely identifies a run/execution with a globally unique, sortable, human-readable date string. + * Format: yyyymmdd-hhmmss-ms + * Example: "20240101-120000-000" + * + * @returns A unique run ID string in readable date format + */ +export function getUniqueTimeId(): string { + return sortableReadableDateString( + Math.floor(performance.timeOrigin + performance.now()), + ); +} + +/** + * Generates a unique process/thread ID. + * This ID uniquely identifies a process/thread execution and prevents race conditions when running + * the same plugin for multiple projects in parallel. + * Format: timeId-pid-threadId + * Example: "20240101-120000-000-12345-1" + * + * @returns A unique ID string combining timestamp, process ID, and thread ID + */ +export function getUniqueProcessThreadId(): string { + return `${getUniqueTimeId()}-${process.pid}-${threadId}`; +} + +/** + * Generates a unique instance ID based on performance time origin, process ID, thread ID, and instance count. + * This ID uniquely identifies an instance across processes and threads. + * Format: timestamp.pid.threadId.counter + * Example: "20240101-120000-000.12345.1.1" + * + * @param counter - Counter that provides the next instance count value + * @returns A unique ID string combining timestamp, process ID, thread ID, and counter + */ +export function getUniqueInstanceId(counter: Counter): string { + return `${getUniqueTimeId()}.${process.pid}.${threadId}.${counter.next()}`; +} + +/** + * Converts a timestamp in milliseconds to a sortable, human-readable date string. + * Format: yyyymmdd-hhmmss-ms + * Example: "20240101-120000-000" + * + * @param timestampMs - Timestamp in milliseconds + * @returns A sortable date string in yyyymmdd-hhmmss-ms format + */ +export function sortableReadableDateString(timestampMs: number): string { + const date = new Date(timestampMs); + const MILLISECONDS_PER_SECOND = 1000; + const yyyy = date.getFullYear(); + const mm = String(date.getMonth() + 1).padStart(2, '0'); + const dd = String(date.getDate()).padStart(2, '0'); + const hh = String(date.getHours()).padStart(2, '0'); + const min = String(date.getMinutes()).padStart(2, '0'); + const ss = String(date.getSeconds()).padStart(2, '0'); + // eslint-disable-next-line @typescript-eslint/no-magic-numbers + const ms = String(timestampMs % MILLISECONDS_PER_SECOND).padStart(3, '0'); + + return `${yyyy}${mm}${dd}-${hh}${min}${ss}-${ms}`; +} diff --git a/packages/utils/src/lib/process-id.unit.test.ts b/packages/utils/src/lib/process-id.unit.test.ts new file mode 100644 index 000000000..61c64968d --- /dev/null +++ b/packages/utils/src/lib/process-id.unit.test.ts @@ -0,0 +1,173 @@ +import { threadId } from 'node:worker_threads'; +import { + type Counter, + ID_PATTERNS, + TIME_ID_BASE, + getUniqueInstanceId, + getUniqueProcessThreadId, + getUniqueTimeId, + sortableReadableDateString, +} from './process-id.js'; + +describe('TIME_ID_BASE', () => { + it.each([ + '20231114-221320-000', + '20240101-120000-000', + '20231231-235959-999', + ])('should match valid time ID format: %s', timeId => { + expect(timeId).toMatch(TIME_ID_BASE); + }); + + it.each(['2023-11-14', '20231114', '20231114-221320', 'invalid'])( + 'should not match invalid time ID format: %s', + timeId => { + expect(timeId).not.toMatch(TIME_ID_BASE); + }, + ); +}); + +describe('ID_PATTERNS', () => { + it.each(['20231114-221320-000', '20240101-120000-000'])( + 'TIME_ID should match valid time ID: %s', + timeId => { + expect(timeId).toMatch(ID_PATTERNS.TIME_ID); + }, + ); + + it.each(['20231114-221320-000.123', '20231114-221320'])( + 'TIME_ID should not match invalid format: %s', + timeId => { + expect(timeId).not.toMatch(ID_PATTERNS.TIME_ID); + }, + ); + + it('GROUP_ID should match valid group ID', () => { + const groupId = '20231114-221320-000'; + expect(groupId).toMatch(ID_PATTERNS.GROUP_ID); + }); + + it.each(['20231114-221320-000-12345-1', '20240101-120000-000-99999-99'])( + 'PROCESS_THREAD_ID should match valid process/thread ID: %s', + processThreadId => { + expect(processThreadId).toMatch(ID_PATTERNS.PROCESS_THREAD_ID); + }, + ); + + it.each(['20231114-221320-000', '20231114-221320-000-12345'])( + 'PROCESS_THREAD_ID should not match invalid format: %s', + processThreadId => { + expect(processThreadId).not.toMatch(ID_PATTERNS.PROCESS_THREAD_ID); + }, + ); + + it.each(['20231114-221320-000.12345.1.1', '20240101-120000-000.99999.99.42'])( + 'INSTANCE_ID should match valid instance ID: %s', + instanceId => { + expect(instanceId).toMatch(ID_PATTERNS.INSTANCE_ID); + }, + ); + + it.each(['20231114-221320-000', '20231114-221320-000-12345-1'])( + 'INSTANCE_ID should not match invalid format: %s', + instanceId => { + expect(instanceId).not.toMatch(ID_PATTERNS.INSTANCE_ID); + }, + ); +}); + +describe('sortableReadableDateString', () => { + it('should format timestamp correctly', () => { + const timestamp = 1_700_000_000_000; // 2023-11-14 22:13:20.000 + const result = sortableReadableDateString(timestamp); + expect(result).toBe('20231114-221320-000'); + expect(result).toMatch(TIME_ID_BASE); + }); +}); + +describe('getUniqueTimeId', () => { + it('should generate time ID with mocked timeOrigin', () => { + const result = getUniqueTimeId(); + + expect(result).toMatch(ID_PATTERNS.TIME_ID); + expect(result).toMatch(ID_PATTERNS.GROUP_ID); + expect(result).toBe('20231114-221320-000'); + }); + + it('should generate new ID on each call (not idempotent)', () => { + let callCount = 0; + vi.spyOn(performance, 'now').mockImplementation(() => callCount++); + + const id1 = getUniqueTimeId(); + const id2 = getUniqueTimeId(); + + expect(id1).not.toBe(id2); + expect(id1).toMatch(ID_PATTERNS.TIME_ID); + expect(id2).toMatch(ID_PATTERNS.TIME_ID); + }); +}); + +describe('getUniqueProcessThreadId', () => { + it('should generate process/thread ID with correct format', () => { + const result = getUniqueProcessThreadId(); + + expect(result).toMatch(ID_PATTERNS.PROCESS_THREAD_ID); + expect(result).toContain(`-10001-${threadId}`); + expect(result).toStartWith('20231114-221320-000'); + }); + + it('should generate new ID on each call (not idempotent)', () => { + let callCount = 0; + vi.spyOn(performance, 'now').mockImplementation(() => callCount++); + + const id1 = getUniqueProcessThreadId(); + const id2 = getUniqueProcessThreadId(); + + expect(id1).not.toBe(id2); + expect(id1).toMatch(ID_PATTERNS.PROCESS_THREAD_ID); + expect(id2).toMatch(ID_PATTERNS.PROCESS_THREAD_ID); + }); +}); + +describe('getUniqueInstanceId', () => { + it('should generate instance ID with correct format', () => { + let counter = 0; + const counterObj: Counter = { + next: () => ++counter, + }; + + const result = getUniqueInstanceId(counterObj); + + expect(result).toMatch(ID_PATTERNS.INSTANCE_ID); + expect(result).toStartWith('20231114-221320-000.'); + expect(result).toContain(`.10001.${threadId}.`); + expect(result).toEndWith('.1'); + }); + + it('should use counter to generate incrementing instance IDs', () => { + let counter = 0; + const counterObj: Counter = { + next: () => ++counter, + }; + + const results = [ + getUniqueInstanceId(counterObj), + getUniqueInstanceId(counterObj), + getUniqueInstanceId(counterObj), + ]; + + expect(results[0]).toEndWith('.1'); + expect(results[1]).toEndWith('.2'); + expect(results[2]).toEndWith('.3'); + }); + + it('should generate different IDs for different calls', () => { + let counter = 0; + const counterObj: Counter = { + next: () => ++counter, + }; + + expect(getUniqueInstanceId(counterObj)).not.toBe( + getUniqueInstanceId(counterObj), + ); + }); +}); diff --git a/packages/utils/src/lib/profiler/__snapshots__/comprehensive-stats-trace-events.jsonl b/packages/utils/src/lib/profiler/__snapshots__/comprehensive-stats-trace-events.jsonl index 5583ed827..717dfddfa 100644 --- a/packages/utils/src/lib/profiler/__snapshots__/comprehensive-stats-trace-events.jsonl +++ b/packages/utils/src/lib/profiler/__snapshots__/comprehensive-stats-trace-events.jsonl @@ -1,8 +1,8 @@ -{"cat":"blink.user_timing","ph":"i","name":"stats-profiler:operation-1:start","pid":10001,"tid":1,"ts":1700000005000000,"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} -{"cat":"blink.user_timing","ph":"b","name":"stats-profiler:operation-1","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000001,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"e","name":"stats-profiler:operation-1","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000002,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"i","name":"stats-profiler:operation-1:end","pid":10001,"tid":1,"ts":1700000005000003,"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} -{"cat":"blink.user_timing","ph":"i","name":"stats-profiler:operation-2:start","pid":10001,"tid":1,"ts":1700000005000004,"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} -{"cat":"blink.user_timing","ph":"b","name":"stats-profiler:operation-2","id2":{"local":"0x2"},"pid":10001,"tid":1,"ts":1700000005000005,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"e","name":"stats-profiler:operation-2","id2":{"local":"0x2"},"pid":10001,"tid":1,"ts":1700000005000006,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"i","name":"stats-profiler:operation-2:end","pid":10001,"tid":1,"ts":1700000005000007,"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000000,"name":"stats-profiler:operation-1:start","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000001,"name":"stats-profiler:operation-1","ph":"b","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000002,"name":"stats-profiler:operation-1","ph":"e","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000003,"name":"stats-profiler:operation-1:end","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000004,"name":"stats-profiler:operation-2:start","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000005,"name":"stats-profiler:operation-2","ph":"b","id2":{"local":"0x2"},"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000006,"name":"stats-profiler:operation-2","ph":"e","id2":{"local":"0x2"},"args":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000007,"name":"stats-profiler:operation-2:end","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Stats\",\"dataType\":\"track-entry\"}}"}}} diff --git a/packages/utils/src/lib/profiler/__snapshots__/custom-tracks-trace-events.jsonl b/packages/utils/src/lib/profiler/__snapshots__/custom-tracks-trace-events.jsonl index 43f83dbdb..4773d8e48 100644 --- a/packages/utils/src/lib/profiler/__snapshots__/custom-tracks-trace-events.jsonl +++ b/packages/utils/src/lib/profiler/__snapshots__/custom-tracks-trace-events.jsonl @@ -1,4 +1,4 @@ -{"cat":"blink.user_timing","ph":"i","name":"api-server:user-lookup:start","pid":10001,"tid":1,"ts":1700000005000000,"args":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}} -{"cat":"blink.user_timing","ph":"b","name":"api-server:user-lookup","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000001,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"e","name":"api-server:user-lookup","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000002,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"i","name":"api-server:user-lookup:end","pid":10001,"tid":1,"ts":1700000005000003,"args":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000000,"name":"api-server:user-lookup:start","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000001,"name":"api-server:user-lookup","ph":"b","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000002,"name":"api-server:user-lookup","ph":"e","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000003,"name":"api-server:user-lookup:end","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"cache\",\"dataType\":\"track-entry\"}}"}}} diff --git a/packages/utils/src/lib/profiler/__snapshots__/sharded-path-trace-events.jsonl b/packages/utils/src/lib/profiler/__snapshots__/sharded-path-trace-events.jsonl index 2a30bcd0a..00b2e36d8 100644 --- a/packages/utils/src/lib/profiler/__snapshots__/sharded-path-trace-events.jsonl +++ b/packages/utils/src/lib/profiler/__snapshots__/sharded-path-trace-events.jsonl @@ -1,4 +1,4 @@ -{"cat":"blink.user_timing","ph":"i","name":"write-test:test-operation:start","pid":10001,"tid":1,"ts":1700000005000000,"args":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}} -{"cat":"blink.user_timing","ph":"b","name":"write-test:test-operation","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000001,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"e","name":"write-test:test-operation","id2":{"local":"0x1"},"pid":10001,"tid":1,"ts":1700000005000002,"args":{"data":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}}} -{"cat":"blink.user_timing","ph":"i","name":"write-test:test-operation:end","pid":10001,"tid":1,"ts":1700000005000003,"args":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000000,"name":"write-test:test-operation:start","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000001,"name":"write-test:test-operation","ph":"b","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000002,"name":"write-test:test-operation","ph":"e","id2":{"local":"0x1"},"args":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}} +{"cat":"blink.user_timing","pid":10001,"tid":1,"ts":1700000005000003,"name":"write-test:test-operation:end","ph":"I","args":{"data":{"detail":"{\"devtools\":{\"track\":\"Test\",\"dataType\":\"track-entry\"}}"}}} diff --git a/packages/utils/src/lib/profiler/constants.ts b/packages/utils/src/lib/profiler/constants.ts index c0e515787..0cb78b7ae 100644 --- a/packages/utils/src/lib/profiler/constants.ts +++ b/packages/utils/src/lib/profiler/constants.ts @@ -25,3 +25,9 @@ export const PROFILER_DEBUG_ENV_VAR = 'CP_PROFILER_DEBUG'; */ export const SHARDED_WAL_COORDINATOR_ID_ENV_VAR = 'CP_SHARDED_WAL_COORDINATOR_ID'; + +/** + * Default base name for WAL files. + * Used as the base name for sharded WAL files (e.g., "trace" in "trace.json"). + */ +export const PROFILER_PERSIST_BASENAME = 'trace'; diff --git a/packages/utils/src/lib/profiler/profiler-node.int.test.ts b/packages/utils/src/lib/profiler/profiler-node.int.test.ts index 1b903ee5a..e5267118e 100644 --- a/packages/utils/src/lib/profiler/profiler-node.int.test.ts +++ b/packages/utils/src/lib/profiler/profiler-node.int.test.ts @@ -5,16 +5,16 @@ import { omitTraceJson, } from '@code-pushup/test-utils'; import type { PerformanceEntryEncoder } from '../performance-observer.js'; -import { WAL_ID_PATTERNS } from '../wal.js'; +import { ID_PATTERNS } from '../process-id.js'; import { NodejsProfiler } from './profiler-node.js'; import { entryToTraceEvents } from './trace-file-utils.js'; -import type { UserTimingTraceEvent } from './trace-file.type.js'; +import type { TraceEvent } from './trace-file.type.js'; describe('NodeJS Profiler Integration', () => { - const traceEventEncoder: PerformanceEntryEncoder = + const traceEventEncoder: PerformanceEntryEncoder = entryToTraceEvents; - let nodejsProfiler: NodejsProfiler; + let nodejsProfiler: NodejsProfiler; beforeEach(() => { performance.clearMarks(); @@ -261,7 +261,7 @@ describe('NodeJS Profiler Integration', () => { const groupIdDir = pathParts.at(-2); const fileName = pathParts.at(-1); - expect(groupIdDir).toMatch(WAL_ID_PATTERNS.GROUP_ID); + expect(groupIdDir).toMatch(ID_PATTERNS.TIME_ID); expect(fileName).toMatch(/^trace\.\d{8}-\d{6}-\d{3}(?:\.\d+){3}\.jsonl$/); const groupIdDirPath = path.dirname(filePath); @@ -283,7 +283,7 @@ describe('NodeJS Profiler Integration', () => { const dirPath = path.dirname(filePath); const groupId = path.basename(dirPath); - expect(groupId).toMatch(WAL_ID_PATTERNS.GROUP_ID); + expect(groupId).toMatch(ID_PATTERNS.TIME_ID); // eslint-disable-next-line n/no-sync expect(fs.existsSync(dirPath)).toBeTrue(); // eslint-disable-next-line n/no-sync diff --git a/packages/utils/src/lib/profiler/profiler-node.ts b/packages/utils/src/lib/profiler/profiler-node.ts index b668b2028..3ab87b642 100644 --- a/packages/utils/src/lib/profiler/profiler-node.ts +++ b/packages/utils/src/lib/profiler/profiler-node.ts @@ -5,6 +5,11 @@ import { type PerformanceObserverOptions, PerformanceObserverSink, } from '../performance-observer.js'; +import { + type Counter, + getUniqueInstanceId, + getUniqueTimeId, +} from '../process-id.js'; import { objectToEntries } from '../transform.js'; import { errorToMarkerPayload } from '../user-timing-extensibility-api-utils.js'; import type { @@ -14,8 +19,6 @@ import type { import { type AppendableSink, WriteAheadLogFile, - getShardId, - getShardedGroupId, getShardedPath, } from '../wal.js'; import { @@ -81,6 +84,13 @@ export class NodejsProfiler< #state: 'idle' | 'running' | 'closed' = 'idle'; #debug: boolean; #unsubscribeExitHandlers: (() => void) | undefined; + #shardCounter: Counter = { + next: (() => { + // eslint-disable-next-line functional/no-let + let count = 0; + return () => ++count; + })(), + }; /** * Creates a NodejsProfiler instance. @@ -110,8 +120,8 @@ export class NodejsProfiler< process.cwd(), getShardedPath({ dir: 'tmp/profiles', - groupId: getShardedGroupId(), - shardId: getShardId(), + groupId: getUniqueTimeId(), + shardId: getUniqueInstanceId(this.#shardCounter), format: walFormat, }), ), diff --git a/packages/utils/src/lib/profiler/profiler-node.unit.test.ts b/packages/utils/src/lib/profiler/profiler-node.unit.test.ts index 5357adc37..02c82f173 100644 --- a/packages/utils/src/lib/profiler/profiler-node.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler-node.unit.test.ts @@ -106,7 +106,7 @@ describe('NodejsProfiler', () => { expect(typeof profiler.measureAsync).toBe('function'); expect(typeof profiler.marker).toBe('function'); expect(typeof profiler.close).toBe('function'); - expect(typeof profiler.state).toBe('string'); + expect(profiler.state).toBeString(); expect(typeof profiler.setEnabled).toBe('function'); }); @@ -406,11 +406,11 @@ describe('NodejsProfiler', () => { const { profiler } = getNodejsProfiler({ enabled: true }); const error = new Error('Async test error'); - await expect(async () => { - await profiler.measureAsync('async-error-test', async () => { + await expect( + profiler.measureAsync('async-error-test', async () => { throw error; - }); - }).rejects.toThrow(error); + }), + ).rejects.toThrow(error); }); it('should skip measurement when profiler is not active', () => { diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index 1061062d3..038c35b73 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -1,3 +1,4 @@ +import os from 'node:os'; import type { PerformanceEntry, PerformanceMark, @@ -5,24 +6,18 @@ import type { } from 'node:perf_hooks'; import { threadId } from 'node:worker_threads'; import { defaultClock } from '../clock-epoch.js'; -import type { UserTimingDetail } from '../user-timing-extensibility-api.type.js'; import type { - BeginEvent, - CompleteEvent, - EndEvent, - InstantEvent, - InstantEventArgs, - InstantEventTracingStartedInBrowser, - SpanEvent, - SpanEventArgs, TraceEvent, TraceEventContainer, - TraceEventRaw, TraceMetadata, - UserTimingTraceEvent, + TracingStartedInBrowserOptions, } from './trace-file.type.js'; -/** Global counter for generating unique span IDs within a trace */ +/** + * Trace-local monotonic span id counter. + * Chrome only requires uniqueness within a single trace file. + * Resetting per trace is intentional - we're not aiming for global uniqueness. + */ // eslint-disable-next-line functional/no-let let id2Count = 0; @@ -33,80 +28,98 @@ let id2Count = 0; export const nextId2 = () => ({ local: `0x${++id2Count}` }); /** - * Provides default values for trace event properties. - * @param opt - Optional overrides for process ID, thread ID, and timestamp - * @param opt.pid - Process ID override, defaults to current process PID - * @param opt.tid - Thread ID override, defaults to current thread ID - * @param opt.ts - Timestamp override in microseconds, defaults to current epoch time - * @returns Object containing pid, tid, and ts with defaults applied - */ -const defaults = (opt?: { pid?: number; tid?: number; ts?: number }) => ({ - pid: opt?.pid ?? process.pid, - tid: opt?.tid ?? threadId, - ts: opt?.ts ?? defaultClock.epochNowUs(), -}); - -/** - * Generates a unique frame tree node ID from process and thread IDs. + * Generates a frame tree node ID from process and thread IDs. * @param pid - Process ID * @param tid - Thread ID - * @returns Combined numeric ID + * @returns Frame tree node ID as a number */ export const frameTreeNodeId = (pid: number, tid: number) => Number.parseInt(`${pid}0${tid}`, 10); /** - * Generates a frame name string from process and thread IDs. + * Generates a frame name from process and thread IDs. * @param pid - Process ID * @param tid - Thread ID - * @returns Formatted frame name + * @returns Frame name string in format FRAME0P{pid}T{tid} */ export const frameName = (pid: number, tid: number) => `FRAME0P${pid}T${tid}`; /** - * Creates an instant trace event for marking a point in time. - * @param opt - Event configuration options - * @returns InstantEvent object + * Core factory for creating trace events with defaults. + * @param opt - Partial trace event with required name and ph + * @returns Complete TraceEvent with defaults applied */ -export const getInstantEvent = (opt: { - name: string; - ts?: number; - pid?: number; - tid?: number; - args?: InstantEventArgs; -}): InstantEvent => ({ - cat: 'blink.user_timing', - ph: 'i', - name: opt.name, - ...defaults(opt), - args: opt.args ?? {}, +const baseEvent = ( + opt: Partial & { name: string; ph: string }, +): TraceEvent => ({ + cat: opt.cat ?? 'blink.user_timing', + pid: opt.pid ?? process.pid, + tid: opt.tid ?? threadId, + ts: opt.ts ?? defaultClock.epochNowUs(), + ...opt, }); +/** + * Creates an instant trace event for marking a point in time. + * @param name - Event name + * @param ts - Optional timestamp in microseconds + * @param opt - Optional event configuration + * @returns Instant trace event (ph: 'I') + */ +export const instant = ( + name: string, + ts?: number, + opt?: Partial, +): TraceEvent => baseEvent({ name, ph: 'I', ts, ...opt }); + +/** + * Creates a pair of begin and end span events. + * @param name - Span name + * @param tsB - Begin timestamp in microseconds + * @param tsE - End timestamp in microseconds + * @param opt - Optional event configuration + * @param opt.tsMarkerPadding - Padding to apply to timestamps (default: 1) + * @returns Array of [begin event, end event] + */ +export const span = ( + name: string, + tsB: number, + tsE: number, + opt?: Partial & { tsMarkerPadding?: number }, +): TraceEvent[] => { + const id2 = opt?.id2 ?? nextId2(); + const pad = opt?.tsMarkerPadding ?? 1; + const { tsMarkerPadding, ...eventOpt } = opt ?? {}; + const args = eventOpt.args ?? {}; + return [ + baseEvent({ name, ph: 'b', ts: tsB + pad, id2, ...eventOpt, args }), + baseEvent({ name, ph: 'e', ts: tsE - pad, id2, ...eventOpt, args }), + ]; +}; + /** * Creates a start tracing event with frame information. * This event is needed at the beginning of the traceEvents array to make tell the UI profiling has started, and it should visualize the data. * @param opt - Tracing configuration options * @returns StartTracingEvent object */ -export const getInstantEventTracingStartedInBrowser = (opt: { - url: string; - ts?: number; - pid?: number; - tid?: number; -}): InstantEventTracingStartedInBrowser => { - const { pid, tid, ts } = defaults(opt); - const id = frameTreeNodeId(pid, tid); +export const getInstantEventTracingStartedInBrowser = ( + opt: TracingStartedInBrowserOptions, +): TraceEvent => { + const pid = opt.pid ?? process.pid; + const tid = opt.tid ?? threadId; + const ts = opt.ts ?? defaultClock.epochNowUs(); return { cat: 'devtools.timeline', - ph: 'i', + ph: 'I', name: 'TracingStartedInBrowser', pid, tid, ts, args: { data: { - frameTreeNodeId: id, + frameTreeNodeId: frameTreeNodeId(pid, tid), frames: [ { frame: frameName(pid, tid), @@ -118,157 +131,78 @@ export const getInstantEventTracingStartedInBrowser = (opt: { }, ], persistentIds: true, - }, + } satisfies Record, }, }; }; /** * Creates a complete trace event with duration. - * @param opt - Event configuration with name and duration - * @returns CompleteEvent object - */ -export const getCompleteEvent = (opt: { - name: string; - dur: number; - ts?: number; - pid?: number; - tid?: number; -}): CompleteEvent => ({ - cat: 'devtools.timeline', - ph: 'X', - name: opt.name, - dur: opt.dur, - ...defaults(opt), - args: {}, -}); - -/** Options for creating span events */ -type SpanOpt = { - name: string; - id2: { local: string }; - ts?: number; - pid?: number; - tid?: number; - args?: SpanEventArgs; -}; - -/** - * Creates a begin span event. - * @param ph - Phase ('b' for begin) - * @param opt - Span event options - * @returns BeginEvent object - */ -export function getSpanEvent(ph: 'b', opt: SpanOpt): BeginEvent; -/** - * Creates an end span event. - * @param ph - Phase ('e' for end) - * @param opt - Span event options - * @returns EndEvent object + * @param name - Event name + * @param dur - Duration in microseconds + * @param opt - Optional event configuration + * @returns Complete trace event (ph: 'X') */ -export function getSpanEvent(ph: 'e', opt: SpanOpt): EndEvent; -/** - * Creates a span event (begin or end). - * @param ph - Phase ('b' or 'e') - * @param opt - Span event options - * @returns SpanEvent object - */ -export function getSpanEvent(ph: 'b' | 'e', opt: SpanOpt): SpanEvent { - return { - cat: 'blink.user_timing', - ph, - name: opt.name, - id2: opt.id2, - ...defaults(opt), - args: opt.args?.data?.detail - ? { data: { detail: opt.args.data.detail } } - : {}, - }; -} - -/** - * Creates a pair of begin and end span events. - * @param opt - Span configuration with start/end timestamps - * @returns Tuple of BeginEvent and EndEvent - */ -export const getSpan = (opt: { - name: string; - tsB: number; - tsE: number; - id2?: { local: string }; - pid?: number; - tid?: number; - args?: SpanEventArgs; - tsMarkerPadding?: number; -}): [BeginEvent, EndEvent] => { - // tsMarkerPadding is here to make the measure slightly smaller so the markers align perfectly. - // Otherwise, the marker is visible at the start of the measure below the frame - // No padding Padding - // spans: ======== |======| - // marks: | | - const pad = opt.tsMarkerPadding ?? 1; - // b|e need to share the same id2 - const id2 = opt.id2 ?? nextId2(); - - return [ - getSpanEvent('b', { - ...opt, - id2, - ts: opt.tsB + pad, - }), - getSpanEvent('e', { - ...opt, - id2, - ts: opt.tsE - pad, - }), - ]; -}; +export const complete = ( + name: string, + dur: number, + opt?: Partial, +): TraceEvent => + baseEvent({ + cat: 'devtools.timeline', + ph: 'X', + name, + dur, + args: {}, + ...opt, + }); /** * Converts a PerformanceMark to an instant trace event. * @param entry - Performance mark entry * @param opt - Optional overrides for name, pid, and tid - * @returns InstantEvent object + * @returns Instant trace event */ export const markToInstantEvent = ( entry: PerformanceMark, opt?: { name?: string; pid?: number; tid?: number }, -): InstantEvent => - getInstantEvent({ - ...opt, - name: opt?.name ?? entry.name, - ts: defaultClock.fromEntry(entry), - args: entry.detail ? { detail: entry.detail } : undefined, - }); +): TraceEvent => + instant( + opt?.name ?? entry.name, + defaultClock.fromEntry(entry), + entry.detail + ? { args: { data: { detail: entry.detail } }, ...opt } + : { args: {}, ...opt }, + ); /** * Converts a PerformanceMeasure to a pair of span events. * @param entry - Performance measure entry * @param opt - Optional overrides for name, pid, and tid - * @returns Tuple of BeginEvent and EndEvent + * @returns Array of [begin event, end event] */ export const measureToSpanEvents = ( entry: PerformanceMeasure, opt?: { name?: string; pid?: number; tid?: number }, -): [BeginEvent, EndEvent] => - getSpan({ - ...opt, - name: opt?.name ?? entry.name, - tsB: defaultClock.fromEntry(entry), - tsE: defaultClock.fromEntry(entry, true), - args: entry.detail ? { data: { detail: entry.detail } } : undefined, - }); +): TraceEvent[] => + span( + opt?.name ?? entry.name, + defaultClock.fromEntry(entry), + defaultClock.fromEntry(entry, true), + { + ...opt, + args: entry.detail ? { detail: entry.detail } : {}, + }, + ); /** - * Converts a PerformanceEntry to an array of UserTimingTraceEvents. + * Converts a PerformanceEntry to an array of trace events. * A mark is converted to an instant event, and a measure is converted to a pair of span events. * Other entry types are ignored. * @param entry - Performance entry - * @returns UserTimingTraceEvent[] + * @returns Array of trace events */ -export function entryToTraceEvents( - entry: PerformanceEntry, -): UserTimingTraceEvent[] { +export function entryToTraceEvents(entry: PerformanceEntry): TraceEvent[] { if (entry.entryType === 'mark') { return [markToInstantEvent(entry as PerformanceMark)]; } @@ -278,21 +212,88 @@ export function entryToTraceEvents( return []; } +/** + * Creates a mapper function for transforming detail properties in args. + * @param fn - Transformation function to apply to detail values + * @returns Function that maps args object + */ +const mapArgs = (fn: (v: unknown) => unknown) => (args?: TraceEvent['args']) => + args && { + ...args, + ...(args.detail != null && { detail: fn(args.detail) }), + ...(args.data?.detail != null && { + data: { ...args.data, detail: fn(args.data.detail) }, + }), + }; + +/** + * Encodes a trace event by converting object details to JSON strings. + * @param e - Trace event with potentially object details + * @returns Trace event with string-encoded details + */ +export const encodeEvent = (e: TraceEvent): TraceEvent => { + const mappedArgs = mapArgs(d => + typeof d === 'object' ? JSON.stringify(d) : d, + )(e.args); + return { + ...e, + ...(mappedArgs && { args: mappedArgs }), + }; +}; + +/** + * Decodes a trace event by parsing JSON string details back to objects. + * @param e - Trace event with potentially string-encoded details + * @returns Trace event with decoded object details + */ +export const decodeEvent = (e: TraceEvent): TraceEvent => { + const mappedArgs = mapArgs(d => (typeof d === 'string' ? JSON.parse(d) : d))( + e.args, + ); + return { + ...e, + ...(mappedArgs && { args: mappedArgs }), + }; +}; + +/** + * Serializes a trace event to a JSON string for storage. + * First encodes the event structure (converting object details to JSON strings), + * then stringifies the entire event. + * @param event - Trace event to serialize + * @returns JSON string representation of the encoded trace event + */ +export const serializeTraceEvent = (event: TraceEvent): string => + JSON.stringify(encodeEvent(event)); + +/** + * Deserializes a JSON string back to a trace event. + * First parses the JSON string, then decodes the event structure + * (parsing JSON string details back to objects). + * @param json - JSON string representation of a trace event + * @returns Decoded trace event + */ +export const deserializeTraceEvent = (json: string): TraceEvent => + decodeEvent(JSON.parse(json)); + /** * Creates trace metadata object with standard DevTools fields and custom metadata. * @param startDate - Optional start date for the trace, defaults to current date * @param metadata - Optional additional metadata to merge into the trace metadata * @returns TraceMetadata object with source, startTime, and merged custom metadata */ -export function getTraceMetadata( - startDate?: Date, - metadata?: Record, -) { +export function getTraceMetadata({ + startDate, + ...metadata +}: Partial = {}): TraceMetadata { + const parsedStartDate = ( + startDate instanceof Date ? startDate : new Date() + ).toISOString(); return { source: 'DevTools', - startTime: startDate?.toISOString() ?? new Date().toISOString(), - hardwareConcurrency: 1, + startTime: parsedStartDate, dataOrigin: 'TraceEvents', + hardwareConcurrency: os.availableParallelism(), ...metadata, }; } @@ -302,121 +303,11 @@ export function getTraceMetadata( * @param opt - Trace file configuration * @returns TraceEventContainer with events and metadata */ -export const getTraceFile = (opt: { +export const createTraceFile = (opt: { traceEvents: TraceEvent[]; - startTime?: string; metadata?: Partial; }): TraceEventContainer => ({ - traceEvents: opt.traceEvents, + traceEvents: opt.traceEvents.map(encodeEvent), displayTimeUnit: 'ms', - metadata: getTraceMetadata( - opt.startTime ? new Date(opt.startTime) : new Date(), - opt.metadata, - ), + metadata: getTraceMetadata(opt.metadata), }); - -/** - * Processes the detail property of an object using a custom processor function. - * @template T - Object type that may contain a detail property - * @param target - Object containing the detail property to process - * @param processor - Function to transform the detail value - * @returns New object with processed detail property, or original object if no detail - */ -function processDetail( - target: T, - processor: (detail: string | object) => string | object, -): T { - if ( - target.detail != null && - (typeof target.detail === 'string' || typeof target.detail === 'object') - ) { - return { ...target, detail: processor(target.detail) }; - } - return target; -} - -/** - * Decodes a JSON string detail property back to its original object form. - * @param target - Object containing a detail property as a JSON string - * @returns UserTimingDetail with the detail property parsed from JSON - */ -export function decodeDetail(target: { detail: string }): UserTimingDetail { - return processDetail(target, detail => - typeof detail === 'string' - ? (JSON.parse(detail) as string | object) - : detail, - ) as UserTimingDetail; -} - -/** - * Encodes object detail properties to JSON strings for storage/transmission. - * @param target - UserTimingDetail object with detail property to encode - * @returns UserTimingDetail with object details converted to JSON strings - */ -export function encodeDetail(target: UserTimingDetail): UserTimingDetail { - return processDetail( - target as UserTimingDetail & { detail?: unknown }, - (detail: string | object) => - typeof detail === 'object' - ? JSON.stringify(detail as UserTimingDetail) - : detail, - ) as UserTimingDetail; -} - -/** - * Decodes a raw trace event with JSON string details back to typed UserTimingTraceEvent. - * Parses detail properties from JSON strings to objects. - * @param event - Raw trace event with string-encoded details - * @returns UserTimingTraceEvent with parsed detail objects - */ -export function decodeTraceEvent({ - args, - ...rest -}: TraceEventRaw): UserTimingTraceEvent { - if (!args) { - return rest as UserTimingTraceEvent; - } - - const processedArgs = decodeDetail(args as { detail: string }); - if ('data' in args && args.data && typeof args.data === 'object') { - // eslint-disable-next-line @typescript-eslint/consistent-type-assertions - return { - ...rest, - args: { - ...processedArgs, - data: decodeDetail(args.data as { detail: string }), - }, - } as UserTimingTraceEvent; - } - // eslint-disable-next-line @typescript-eslint/consistent-type-assertions - return { ...rest, args: processedArgs } as UserTimingTraceEvent; -} - -/** - * Encodes a UserTimingTraceEvent to raw format with JSON string details. - * Converts object details to JSON strings for storage/transmission. - * @param event - UserTimingTraceEvent with object details - * @returns TraceEventRaw with string-encoded details - */ -export function encodeTraceEvent({ - args, - ...rest -}: UserTimingTraceEvent): TraceEventRaw { - if (!args) { - return rest as TraceEventRaw; - } - - const processedArgs = encodeDetail(args as UserTimingDetail); - if ('data' in args && args.data && typeof args.data === 'object') { - const result: TraceEventRaw = { - ...rest, - args: { - ...processedArgs, - data: encodeDetail(args.data as UserTimingDetail), - }, - }; - return result; - } - const result: TraceEventRaw = { ...rest, args: processedArgs }; - return result; -} diff --git a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts index aa21887af..93cd89665 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts @@ -1,27 +1,24 @@ import type { PerformanceMark, PerformanceMeasure } from 'node:perf_hooks'; import { - decodeDetail, - decodeTraceEvent, - encodeDetail, - encodeTraceEvent, + complete, + createTraceFile, + decodeEvent, + deserializeTraceEvent, + encodeEvent, entryToTraceEvents, - frameName, - frameTreeNodeId, - getCompleteEvent, - getInstantEvent, getInstantEventTracingStartedInBrowser, - getSpan, - getSpanEvent, - getTraceFile, getTraceMetadata, + instant, markToInstantEvent, measureToSpanEvents, nextId2, + serializeTraceEvent, + span, } from './trace-file-utils.js'; describe('getTraceFile', () => { it('should create trace file with empty events array', () => { - expect(getTraceFile({ traceEvents: [] })).toStrictEqual({ + expect(createTraceFile({ traceEvents: [] })).toStrictEqual({ traceEvents: [], displayTimeUnit: 'ms', metadata: { @@ -35,11 +32,9 @@ describe('getTraceFile', () => { it('should create trace file with events', () => { expect( - getTraceFile({ + createTraceFile({ traceEvents: [ - getInstantEvent({ - name: 'test-event', - ts: 1_234_567_890, + instant('test-event', 1_234_567_890, { pid: 123, tid: 456, }), @@ -65,9 +60,11 @@ describe('getTraceFile', () => { }); it('should use custom startTime when provided', () => { - const result = getTraceFile({ + const result = createTraceFile({ traceEvents: [], - startTime: '2023-01-01T00:00:00.000Z', + metadata: { + startTime: '2023-01-01T00:00:00.000Z', + }, }); expect(result).toHaveProperty( @@ -79,7 +76,7 @@ describe('getTraceFile', () => { }); it('should include hardware concurrency', () => { - expect(getTraceFile({ traceEvents: [] })).toHaveProperty( + expect(createTraceFile({ traceEvents: [] })).toHaveProperty( 'metadata', expect.objectContaining({ hardwareConcurrency: expect.any(Number), @@ -88,33 +85,13 @@ describe('getTraceFile', () => { }); }); -describe('frameTreeNodeId', () => { - it.each([ - [123, 456, 1_230_456], - [1, 2, 102], - [999, 999, 9_990_999], - ])('should generate correct frame tree node ID', (pid, tid, expected) => { - expect(frameTreeNodeId(pid, tid)).toBe(expected); - }); -}); - -describe('frameName', () => { - it.each([ - [123, 456], - [1, 2], - [999, 999], - ])('should generate correct frame name', (pid, tid) => { - expect(frameName(pid, tid)).toBe(`FRAME0P${pid}T${tid}`); - }); -}); - describe('getInstantEventTracingStartedInBrowser', () => { it('should create start tracing event with required url', () => { expect( getInstantEventTracingStartedInBrowser({ url: 'https://example.com' }), ).toStrictEqual({ cat: 'devtools.timeline', - ph: 'i', + ph: 'I', name: 'TracingStartedInBrowser', pid: expect.any(Number), tid: expect.any(Number), @@ -147,7 +124,7 @@ describe('getInstantEventTracingStartedInBrowser', () => { }), ).toStrictEqual({ cat: 'devtools.timeline', - ph: 'i', + ph: 'I', name: 'TracingStartedInBrowser', pid: 777, tid: 888, @@ -172,14 +149,9 @@ describe('getInstantEventTracingStartedInBrowser', () => { }); }); -describe('getCompleteEvent', () => { +describe('complete', () => { it('should create complete event with required fields', () => { - expect( - getCompleteEvent({ - name: 'test-complete', - dur: 1000, - }), - ).toStrictEqual({ + expect(complete('test-complete', 1000)).toStrictEqual({ cat: 'devtools.timeline', ph: 'X', name: 'test-complete', @@ -193,9 +165,7 @@ describe('getCompleteEvent', () => { it('should use custom pid, tid, and ts', () => { expect( - getCompleteEvent({ - name: 'custom-complete', - dur: 500, + complete('custom-complete', 500, { pid: 111, tid: 222, ts: 1_234_567_890, @@ -223,12 +193,12 @@ describe('markToInstantEvent', () => { } as PerformanceMark), ).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-mark', pid: expect.any(Number), tid: expect.any(Number), ts: expect.any(Number), - args: { detail: { customData: 'test' } }, + args: { data: { detail: { customData: 'test' } } }, }); }); @@ -241,7 +211,7 @@ describe('markToInstantEvent', () => { } as PerformanceMark), ).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-mark', pid: expect.any(Number), tid: expect.any(Number), @@ -266,12 +236,12 @@ describe('markToInstantEvent', () => { ), ).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'custom-name', pid: 999, tid: 888, ts: expect.any(Number), - args: { detail: { customData: 'test' } }, + args: { data: { detail: { customData: 'test' } } }, }); }); }); @@ -294,7 +264,7 @@ describe('measureToSpanEvents', () => { tid: expect.any(Number), ts: expect.any(Number), id2: { local: expect.stringMatching(/^0x\d+$/) }, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }, { cat: 'blink.user_timing', @@ -304,7 +274,7 @@ describe('measureToSpanEvents', () => { tid: expect.any(Number), ts: expect.any(Number), id2: { local: expect.stringMatching(/^0x\d+$/) }, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }, ]); }); @@ -361,63 +331,21 @@ describe('measureToSpanEvents', () => { name: 'custom-measure', pid: 777, tid: 666, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }), expect.objectContaining({ name: 'custom-measure', pid: 777, tid: 666, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }), ]); }); }); -describe('getSpanEvent', () => { - it('should create begin event with args detail', () => { - expect( - getSpanEvent('b', { - name: 'test-span', - id2: { local: '0x1' }, - args: { data: { detail: { customData: 'test' } as any } }, - }), - ).toStrictEqual({ - cat: 'blink.user_timing', - ph: 'b', - name: 'test-span', - pid: expect.any(Number), - tid: expect.any(Number), - ts: expect.any(Number), - id2: { local: '0x1' }, - args: { data: { detail: { customData: 'test' } } }, - }); - }); - - it('should create end event without args detail', () => { - expect( - getSpanEvent('e', { - name: 'test-span', - id2: { local: '0x2' }, - }), - ).toStrictEqual({ - cat: 'blink.user_timing', - ph: 'e', - name: 'test-span', - pid: expect.any(Number), - tid: expect.any(Number), - ts: expect.any(Number), - id2: { local: '0x2' }, - args: {}, - }); - }); -}); - -describe('getSpan', () => { +describe('span', () => { it('should create span events with custom tsMarkerPadding', () => { - const result = getSpan({ - name: 'test-span', - tsB: 1000, - tsE: 1500, + const result = span('test-span', 1000, 1500, { tsMarkerPadding: 5, args: {}, }); @@ -447,23 +375,16 @@ describe('getSpan', () => { }); it('should generate id2 when not provided', () => { - const result = getSpan({ - name: 'test-span', - tsB: 1000, - tsE: 1500, - }); + const result = span('test-span', 1000, 1500); expect(result).toHaveLength(2); - expect(result[0].id2?.local).toMatch(/^0x\d+$/); - expect(result[1].id2).toEqual(result[0].id2); + expect(result.at(0)?.id2?.local).toMatch(/^0x\d+$/); + expect(result.at(1)?.id2).toEqual(result.at(0)?.id2); }); it('should use provided id2', () => { expect( - getSpan({ - name: 'test-span', - tsB: 1000, - tsE: 1500, + span('test-span', 1000, 1500, { id2: { local: 'custom-id' }, }), ).toStrictEqual([ @@ -529,12 +450,12 @@ describe('entryToTraceEvents', () => { expect(result).toHaveLength(1); expect(result[0]).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-mark', pid: expect.any(Number), tid: expect.any(Number), ts: expect.any(Number), - args: { detail: { customData: 'test' } }, + args: { data: { detail: { customData: 'test' } } }, }); }); @@ -559,7 +480,7 @@ describe('entryToTraceEvents', () => { tid: expect.any(Number), ts: expect.any(Number), id2: { local: expect.stringMatching(/^0x\d+$/) }, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }); expect(result[1]).toStrictEqual({ cat: 'blink.user_timing', @@ -569,7 +490,7 @@ describe('entryToTraceEvents', () => { tid: expect.any(Number), ts: expect.any(Number), id2: { local: expect.stringMatching(/^0x\d+$/) }, - args: { data: { detail: { measurement: 'data' } } }, + args: { detail: { measurement: 'data' } }, }); }); @@ -601,14 +522,14 @@ describe('getTraceMetadata', () => { it('should use provided startDate', () => { const startDate = new Date('2023-01-01T00:00:00.000Z'); - const metadata = getTraceMetadata(startDate); + const metadata = getTraceMetadata({ startDate }); expect(metadata.startTime).toBe('2023-01-01T00:00:00.000Z'); }); it('should merge custom metadata', () => { const customMetadata = { version: '1.0.0', platform: 'node' }; - const metadata = getTraceMetadata(undefined, customMetadata); + const metadata = getTraceMetadata(customMetadata); expect(metadata).toStrictEqual({ source: 'DevTools', @@ -621,116 +542,134 @@ describe('getTraceMetadata', () => { }); }); -describe('decodeDetail', () => { - it('should decode string detail back to object', () => { - const input = { detail: '{"key": "value"}' }; - const result = decodeDetail(input); +describe('decodeEvent', () => { + it('should decode trace event with string details', () => { + const encodedEvent = { + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom": "data"}', + data: { detail: '{"nested": "value"}' }, + }, + }; + + const result = decodeEvent(encodedEvent); expect(result).toStrictEqual({ - detail: { key: 'value' }, + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, + }, }); }); - it('should return object detail unchanged', () => { - const input = { detail: { key: 'value' } }; - const result = decodeDetail(input); - - expect(result).toStrictEqual(input); - }); - - it('should return input unchanged when detail is not string or object', () => { - const input = { detail: 123 }; - const result = decodeDetail(input as any); - - expect(result).toStrictEqual(input); - }); - - it('should return input unchanged when no detail property', () => { - const input = { other: 'value' }; - const result = decodeDetail(input as any); - - expect(result).toStrictEqual(input); - }); -}); + it('should handle trace event without args', () => { + const encodedEvent = { + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }; -describe('encodeDetail', () => { - it('should encode object detail to JSON string', () => { - const input = { detail: { key: 'value' } }; - const result = encodeDetail(input); + const result = decodeEvent(encodedEvent); expect(result).toStrictEqual({ - detail: '{"key":"value"}', + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, }); }); - it('should return string detail unchanged', () => { - const input = { detail: 'already a string' }; - const result = encodeDetail(input); - - expect(result).toStrictEqual(input); - }); - - it('should return input unchanged when detail is not string or object', () => { - const input = { detail: 123 }; - const result = encodeDetail(input as any); - - expect(result).toStrictEqual(input); - }); + it('should handle args without data property', () => { + const encodedEvent = { + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom": "data"}', + }, + }; - it('should return input unchanged when no detail property', () => { - const input = { other: 'value' }; - const result = encodeDetail(input as any); + const result = decodeEvent(encodedEvent); - expect(result).toStrictEqual(input); + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + }, + }); }); }); -describe('decodeTraceEvent', () => { - it('should decode trace event with string details', () => { - const rawEvent = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, +describe('encodeEvent', () => { + it('should encode trace event with object details', () => { + const event = { + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { - detail: '{"custom": "data"}', - data: { detail: '{"nested": "value"}' }, + detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, }, }; - const result = decodeTraceEvent(rawEvent); + const result = encodeEvent(event); expect(result).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { - detail: { custom: 'data' }, - data: { detail: { nested: 'value' } }, + detail: '{"custom":"data"}', + data: { detail: '{"nested":"value"}' }, }, }); }); it('should handle trace event without args', () => { - const rawEvent = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, + const event = { + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, }; - const result = decodeTraceEvent(rawEvent); + const result = encodeEvent(event); expect(result).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-event', pid: 123, tid: 456, @@ -739,80 +678,81 @@ describe('decodeTraceEvent', () => { }); it('should handle args without data property', () => { - const rawEvent = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, + const event = { + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { - detail: '{"custom": "data"}', + detail: { custom: 'data' }, }, }; - const result = decodeTraceEvent(rawEvent); + const result = encodeEvent(event); expect(result).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { - detail: { custom: 'data' }, + detail: '{"custom":"data"}', }, }); }); }); -describe('encodeTraceEvent', () => { - it('should encode trace event with object details', () => { +describe('serializeTraceEvent', () => { + it('should serialize trace event to JSON string', () => { const event = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { detail: { custom: 'data' }, - data: { detail: { nested: 'value' } }, }, }; - const result = encodeTraceEvent(event); + const result = serializeTraceEvent(event); - expect(result).toStrictEqual({ + expect(result).toBeString(); + const parsed = JSON.parse(result); + expect(parsed).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { detail: '{"custom":"data"}', - data: { detail: '{"nested":"value"}' }, }, }); }); it('should handle trace event without args', () => { const event = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, }; - const result = encodeTraceEvent(event); + const result = serializeTraceEvent(event); - expect(result).toStrictEqual({ + const parsed = JSON.parse(result); + expect(parsed).toStrictEqual({ cat: 'blink.user_timing', - ph: 'i', + ph: 'I', name: 'test-event', pid: 123, tid: 456, @@ -820,31 +760,107 @@ describe('encodeTraceEvent', () => { }); }); - it('should handle args without data property', () => { + it('should handle nested object details in args', () => { const event = { - cat: 'blink.user_timing' as const, - ph: 'i' as const, + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, }, }; - const result = encodeTraceEvent(event); + const result = serializeTraceEvent(event); - expect(result).toStrictEqual({ + const parsed = JSON.parse(result); + expect(parsed.args).toStrictEqual({ + detail: '{"custom":"data"}', + data: { detail: '{"nested":"value"}' }, + }); + }); +}); + +describe('deserializeTraceEvent', () => { + it('should deserialize JSON string back to trace event', () => { + const originalEvent = { cat: 'blink.user_timing', - ph: 'i', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + }, + }; + + const serialized = serializeTraceEvent(originalEvent); + const deserialized = deserializeTraceEvent(serialized); + + expect(deserialized).toStrictEqual(originalEvent); + }); + + it('should handle round-trip serialization', () => { + const originalEvent = { + cat: 'blink.user_timing', + ph: 'I', + name: 'round-trip-test', + pid: 789, + tid: 101, + ts: 987_654_321, + args: { + detail: { custom: 'data', nested: { value: 42 } }, + data: { detail: { nested: 'value' } }, + }, + }; + + const serialized = serializeTraceEvent(originalEvent); + const deserialized = deserializeTraceEvent(serialized); + const reSerialized = serializeTraceEvent(deserialized); + const reDeserialized = deserializeTraceEvent(reSerialized); + + expect(reDeserialized).toStrictEqual(originalEvent); + }); + + it('should handle trace event without args', () => { + const originalEvent = { + cat: 'blink.user_timing', + ph: 'I', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }; + + const serialized = serializeTraceEvent(originalEvent); + const deserialized = deserializeTraceEvent(serialized); + + expect(deserialized).toStrictEqual(originalEvent); + }); + + it('should decode string-encoded details back to objects', () => { + const jsonString = JSON.stringify({ + cat: 'blink.user_timing', + ph: 'I', name: 'test-event', pid: 123, tid: 456, ts: 1000, args: { detail: '{"custom":"data"}', + data: { detail: '{"nested":"value"}' }, }, }); + + const deserialized = deserializeTraceEvent(jsonString); + + expect(deserialized.args).toStrictEqual({ + detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, + }); }); }); diff --git a/packages/utils/src/lib/profiler/trace-file.type.ts b/packages/utils/src/lib/profiler/trace-file.type.ts index 839c06884..669173de1 100644 --- a/packages/utils/src/lib/profiler/trace-file.type.ts +++ b/packages/utils/src/lib/profiler/trace-file.type.ts @@ -1,169 +1,31 @@ -import type { UserTimingDetail } from '../user-timing-extensibility-api.type.js'; +import type { + MarkerPayload, + TrackEntryPayload, +} from '../user-timing-extensibility-api.type.js'; -/** - * Arguments for instant trace events. - * @property {UserTimingDetail} [detail] - Optional user timing detail with DevTools payload - */ -export type InstantEventArgs = { - detail?: UserTimingDetail; -} & { [key: string]: unknown }; - -/** - * Arguments for span trace events (begin/end events). - * @property {object} [data] - Optional data object - * @property {UserTimingDetail} [data.detail] - Optional user timing detail with DevTools payload - */ -export type SpanEventArgs = { - data?: { detail?: UserTimingDetail }; -} & { [key: string]: unknown }; - -/** - * Arguments for complete trace events. - * @property {Record} [detail] - Optional detail object with arbitrary properties - */ -export type CompleteEventArgs = { detail?: Record }; - -/** - * Arguments for start tracing events. - * @property {object} data - Tracing initialization data - * @property {number} data.frameTreeNodeId - Frame tree node identifier - * @property {Array} data.frames - Array of frame information - * @property {boolean} data.persistentIds - Whether IDs are persistent - */ -export type InstantEventTracingStartedInBrowserArgs = { - data: { - frameTreeNodeId: number; - frames: { - frame: string; - isInPrimaryMainFrame: boolean; - isOutermostMainFrame: boolean; - name: string; - processId: number; - url: string; - }[]; - persistentIds: boolean; - }; -}; - -/** - * Union type of all possible trace event arguments. - */ -export type TraceArgs = - | InstantEventArgs - | SpanEventArgs - | CompleteEventArgs - | InstantEventTracingStartedInBrowserArgs; +/** DevTools payload type for trace events. */ +export type DevToolsPayload = TrackEntryPayload | MarkerPayload; /** - * Base properties shared by all trace events. - * @property {string} cat - Event category - * @property {string} name - Event name - * @property {number} pid - Process ID - * @property {number} tid - Thread ID - * @property {number} ts - Timestamp in epoch microseconds - * @property {TraceArgs} [args] - Optional event arguments + * Unified trace event type for Chrome DevTools trace format. */ -export type BaseTraceEvent = { +export type TraceEvent = { cat: string; + ph: string; name: string; pid: number; tid: number; ts: number; - args: TraceArgs; -}; - -/** - * Start tracing event for Chrome DevTools tracing. - */ -export type InstantEventTracingStartedInBrowser = BaseTraceEvent & { - cat: 'devtools.timeline'; - ph: 'i'; - name: 'TracingStartedInBrowser'; - args: InstantEventTracingStartedInBrowserArgs; -}; - -/** - * Complete trace event with duration. - * Represents a complete operation with start time and duration. - * @property {'X'} ph - Phase indicator for complete events - * @property {number} dur - Duration in microseconds - */ -export type CompleteEvent = BaseTraceEvent & { ph: 'X'; dur: number }; - -/** - * Instant trace event representing a single point in time. - * Used for user timing marks and other instantaneous events. - * @property {'blink.user_timing'} cat - Fixed category for user timing events - * @property {'i'} ph - Phase indicator for instant events - * @property {never} [dur] - Duration is not applicable for instant events - * @property {InstantEventArgs} [args] - Optional event arguments - */ -export type InstantEvent = Omit & { - cat: 'blink.user_timing'; - ph: 'i'; - dur?: never; - args: InstantEventArgs; -}; - -/** - * Core properties for span trace events (begin/end pairs). - * @property {object} id2 - Span identifier - * @property {string} id2.local - Local span ID (unique to the process, same for b and e events) - * @property {SpanEventArgs} [args] - Optional event arguments - */ -type SpanCore = Omit & { - id2: { local: string }; - args: SpanEventArgs; -}; -/** - * Begin event for a span (paired with an end event). - * @property {'b'} ph - Phase indicator for begin events - * @property {never} [dur] - Duration is not applicable for begin events - */ -export type BeginEvent = SpanCore & { - ph: 'b'; - dur?: never; + dur?: number; + id2?: { local: string }; + args?: { + detail?: unknown; + data?: { detail?: unknown; [key: string]: unknown }; + devtools?: DevToolsPayload; + [key: string]: unknown; + }; }; -/** - * End event for a span (paired with a begin event). - * @property {'e'} ph - Phase indicator for end events - * @property {never} [dur] - Duration is not applicable for end events - */ -export type EndEvent = SpanCore & { ph: 'e'; dur?: never }; - -/** - * Union type for span events (begin or end). - */ -export type SpanEvent = BeginEvent | EndEvent; - -/** - * Union type of all trace event types. - */ -export type UserTimingTraceEvent = InstantEvent | SpanEvent; - -/** - * All trace events including system events added during finalization. - */ -export type TraceEvent = - | UserTimingTraceEvent - | CompleteEvent - | InstantEventTracingStartedInBrowser; - -/** - * Raw arguments format for trace events before processing. - * Either contains a detail string directly or nested in a data object. - */ -type RawArgs = - | { detail?: string; [key: string]: unknown } - | { data?: { detail?: string }; [key: string]: unknown }; - -/** - * Raw trace event format before type conversion. - * Similar to TraceEvent but with unprocessed arguments. - */ -export type TraceEventRaw = Omit & { args: RawArgs }; - /** * Time window bounds (min, max) in trace time units (e.g. microseconds). * @property {number} min - Minimum timestamp in the window @@ -293,4 +155,12 @@ export type TraceEventContainer = { /** * Trace file format - either an array of events or a structured container. */ -export type TraceFile = TraceEvent[] | TraceEventContainer; +export type TraceFile = TraceEventContainer; + +/** Options for creating a tracing started in browser event. */ +export type TracingStartedInBrowserOptions = { + url: string; + ts?: number; + pid?: number; + tid?: number; +}; diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index fcdfec4b1..a0690d8f1 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -1,13 +1,14 @@ import { defaultClock } from '../clock-epoch.js'; import type { InvalidEntry, WalFormat } from '../wal.js'; +import { PROFILER_PERSIST_BASENAME } from './constants.js'; import { - decodeTraceEvent, - encodeTraceEvent, - getCompleteEvent, + complete, + createTraceFile, + deserializeTraceEvent, getInstantEventTracingStartedInBrowser, - getTraceFile, + serializeTraceEvent, } from './trace-file-utils.js'; -import type { TraceEvent, UserTimingTraceEvent } from './trace-file.type.js'; +import type { TraceEvent, TraceMetadata } from './trace-file.type.js'; /** Name for the trace start margin event */ const TRACE_START_MARGIN_NAME = '[trace padding start]'; @@ -18,86 +19,68 @@ const TRACE_MARGIN_US = 1_000_000; /** Duration in microseconds for margin events (20ms = 20,000μs) */ const TRACE_MARGIN_DURATION_US = 20_000; -/** - * Generates a complete Chrome DevTools trace file content as JSON string. - * Adds margin events around the trace events and includes metadata. - * @param events - Array of user timing trace events to include - * @param metadata - Optional custom metadata to include in the trace file - * @returns JSON string representation of the complete trace file - */ export function generateTraceContent( - events: UserTimingTraceEvent[], - metadata?: Record, + events: TraceEvent[], + metadata?: Partial, ): string { - const traceContainer = getTraceFile({ - traceEvents: events, - startTime: new Date().toISOString(), - metadata: { - ...metadata, - generatedAt: new Date().toISOString(), - }, - }); - - const marginUs = TRACE_MARGIN_US; - const marginDurUs = TRACE_MARGIN_DURATION_US; - - const sortedEvents = [...events].sort((a, b) => a.ts - b.ts); const fallbackTs = defaultClock.epochNowUs(); - const firstTs: number = sortedEvents.at(0)?.ts ?? fallbackTs; - const lastTs: number = sortedEvents.at(-1)?.ts ?? fallbackTs; + const sortedEvents = + events.length > 0 ? [...events].sort((a, b) => a.ts - b.ts) : []; - const startTs = firstTs - marginUs; - const endTs = lastTs + marginUs; + const firstTs = sortedEvents.at(0)?.ts ?? fallbackTs; + const lastTs = sortedEvents.at(-1)?.ts ?? fallbackTs; - const traceEvents: TraceEvent[] = [ - getInstantEventTracingStartedInBrowser({ - ts: startTs, - url: events.length === 0 ? 'empty-trace' : 'generated-trace', - }), - getCompleteEvent({ - name: TRACE_START_MARGIN_NAME, - ts: startTs, - dur: marginDurUs, + return JSON.stringify( + createTraceFile({ + traceEvents: [ + getInstantEventTracingStartedInBrowser({ + ts: firstTs - TRACE_MARGIN_US, + // TODO: add the stringifies command of the process that was traced when sharded WAL is implemented in profiler + url: events.length > 0 ? 'generated-trace' : 'empty-trace', + }), + complete(TRACE_START_MARGIN_NAME, TRACE_MARGIN_DURATION_US, { + ts: firstTs - TRACE_MARGIN_US, + }), + ...sortedEvents, + complete(TRACE_END_MARGIN_NAME, TRACE_MARGIN_DURATION_US, { + ts: lastTs + TRACE_MARGIN_US, + }), + ], + metadata: { + ...metadata, + startTime: new Date(firstTs / 1000).toISOString(), + }, }), - ...sortedEvents, - getCompleteEvent({ - name: TRACE_END_MARGIN_NAME, - ts: endTs, - dur: marginDurUs, - }), - ]; - - return JSON.stringify({ ...traceContainer, traceEvents }); + ); } +/** + * Codec for encoding and decoding trace events. + * Encodes nested objects in args.detail and args.data.detail to JSON strings for storage. + */ +export const traceEventCodec = { + encode: serializeTraceEvent, + decode: deserializeTraceEvent, +}; + /** * Creates a WAL (Write-Ahead Logging) format configuration for Chrome DevTools trace files. * Automatically finalizes shards into complete trace files with proper metadata and margin events. * @returns WalFormat configuration object with baseName, codec, extensions, and finalizer */ export function traceEventWalFormat() { - const baseName = 'trace'; - const walExtension = '.jsonl'; - const finalExtension = '.json'; return { - baseName, - walExtension, - finalExtension, - codec: { - encode: (event: UserTimingTraceEvent) => - JSON.stringify(encodeTraceEvent(event)), - decode: (json: string) => - decodeTraceEvent(JSON.parse(json)) as UserTimingTraceEvent, - }, + baseName: PROFILER_PERSIST_BASENAME, + walExtension: '.jsonl', + finalExtension: '.json', + codec: traceEventCodec, finalizer: ( - records: (UserTimingTraceEvent | InvalidEntry)[], + records: (TraceEvent | InvalidEntry)[], metadata?: Record, - ) => { - const validRecords = records.filter( - (r): r is UserTimingTraceEvent => - !(typeof r === 'object' && r != null && '__invalid' in r), - ); - return generateTraceContent(validRecords, metadata); - }, - } satisfies WalFormat; + ) => + generateTraceContent( + records.filter((r): r is TraceEvent => !('__invalid' in (r as object))), + metadata, + ), + } satisfies WalFormat; } diff --git a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts index 3f40293ec..8b25465f7 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts @@ -1,10 +1,224 @@ -import type { UserTimingTraceEvent } from './trace-file.type.js'; -import { generateTraceContent, traceEventWalFormat } from './wal-json-trace.js'; +import type { TraceEvent } from './trace-file.type.js'; +import { + generateTraceContent, + traceEventCodec, + traceEventWalFormat, +} from './wal-json-trace.js'; + +describe('traceEventCodec', () => { + // Memory representation: TraceEvent objects with nested objects in args.detail and args.data.detail + // This is the format we process and hold in memory + const instantEvent: TraceEvent = { + name: 'cp:test-event', + ph: 'I', + ts: 123_456_789, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { + detail: { + custom: 'data', + }, + data: { + detail: { + nested: 'value', + }, + }, + devtools: { + dataType: 'track-entry', + track: 'test-track', + color: 'primary', + tooltipText: 'Test event tooltip', + }, + }, + } satisfies TraceEvent; + + const spanBeginEvent: TraceEvent = { + name: 'cp:test-span', + ph: 'b', + ts: 200_000_000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + id2: { local: '0x1' }, + args: { + devtools: { + dataType: 'track-entry', + track: 'span-track', + color: 'secondary', + tooltipText: 'Test span begin', + }, + }, + } satisfies TraceEvent; + + const spanEndEvent: TraceEvent = { + name: 'cp:test-span', + ph: 'e', + ts: 250_000_000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + id2: { local: '0x1' }, + args: { + devtools: { + dataType: 'track-entry', + track: 'span-track', + color: 'secondary', + tooltipText: 'Test span end', + }, + }, + } satisfies TraceEvent; + + // Encoded JSON string representation: nested objects in args.detail and args.data.detail are JSON strings + // This is the format stored in WAL files (.jsonl) + const instantEventJsonString = JSON.stringify({ + name: 'cp:test-event', + ph: 'I', + ts: 123_456_789, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { + detail: JSON.stringify({ custom: 'data' }), + data: { + detail: JSON.stringify({ nested: 'value' }), + }, + devtools: { + dataType: 'track-entry', + track: 'test-track', + color: 'primary', + tooltipText: 'Test event tooltip', + }, + }, + }); + + const spanBeginEventJsonString = JSON.stringify({ + name: 'cp:test-span', + ph: 'b', + ts: 200_000_000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + id2: { local: '0x1' }, + args: { + devtools: { + dataType: 'track-entry', + track: 'span-track', + color: 'secondary', + tooltipText: 'Test span begin', + }, + }, + }); + + const spanEndEventJsonString = JSON.stringify({ + name: 'cp:test-span', + ph: 'e', + ts: 250_000_000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + id2: { local: '0x1' }, + args: { + devtools: { + dataType: 'track-entry', + track: 'span-track', + color: 'secondary', + tooltipText: 'Test span end', + }, + }, + }); + + describe('decode direction (JSON string → memory object)', () => { + it('should decode instant event from JSON string', () => { + const decoded = traceEventCodec.decode(instantEventJsonString); + expect(decoded).toStrictEqual(instantEvent); + }); + + it('should decode span begin event from JSON string', () => { + const decoded = traceEventCodec.decode(spanBeginEventJsonString); + expect(decoded).toStrictEqual(spanBeginEvent); + }); + + it('should decode span end event from JSON string', () => { + const decoded = traceEventCodec.decode(spanEndEventJsonString); + expect(decoded).toStrictEqual(spanEndEvent); + }); + + it('should decode events with nested detail objects correctly', () => { + const decoded = traceEventCodec.decode(instantEventJsonString); + expect(decoded.args?.detail).toStrictEqual({ custom: 'data' }); + expect(decoded.args?.data?.detail).toStrictEqual({ nested: 'value' }); + }); + }); + + describe('encode direction (memory object → JSON string)', () => { + it('should encode instant event to JSON string', () => { + const encoded = traceEventCodec.encode(instantEvent); + const parsed = JSON.parse(encoded); + expect(parsed.args.detail).toBe(JSON.stringify({ custom: 'data' })); + expect(parsed.args.data.detail).toBe(JSON.stringify({ nested: 'value' })); + }); + + it('should encode span begin event to JSON string', () => { + const encoded = traceEventCodec.encode(spanBeginEvent); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(spanBeginEvent); + }); + + it('should encode span end event to JSON string', () => { + const encoded = traceEventCodec.encode(spanEndEvent); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(spanEndEvent); + }); + + it('should encode nested detail objects as JSON strings', () => { + const encoded = traceEventCodec.encode(instantEvent); + const parsed = JSON.parse(encoded); + expect(parsed.args.detail).toBeString(); + expect(parsed.args.data.detail).toBeString(); + expect(JSON.parse(parsed.args.detail)).toStrictEqual({ custom: 'data' }); + expect(JSON.parse(parsed.args.data.detail)).toStrictEqual({ + nested: 'value', + }); + }); + }); + + describe('round-trip (memory → string → memory)', () => { + it('should maintain consistency for instant event', () => { + const encoded = traceEventCodec.encode(instantEvent); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(instantEvent); + }); + + it('should maintain consistency for span begin event', () => { + const encoded = traceEventCodec.encode(spanBeginEvent); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(spanBeginEvent); + }); + + it('should maintain consistency for span end event', () => { + const encoded = traceEventCodec.encode(spanEndEvent); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(spanEndEvent); + }); + + it('should handle multiple round-trips correctly', () => { + let current = instantEvent; + // eslint-disable-next-line functional/no-loop-statements + for (let i = 0; i < 3; i++) { + const encoded = traceEventCodec.encode(current); + const decoded = traceEventCodec.decode(encoded); + expect(decoded).toStrictEqual(instantEvent); + current = decoded; + } + }); + }); +}); describe('generateTraceContent', () => { it('should generate trace content for empty events array', () => { - const events: UserTimingTraceEvent[] = []; - const metadata = { version: '1.0.0', generatedAt: '2024-01-01T00:00:00Z' }; + const events: TraceEvent[] = []; + const metadata = { version: '1.0.0' }; const result = generateTraceContent(events, metadata); @@ -13,7 +227,7 @@ describe('generateTraceContent', () => { traceEvents: [ expect.objectContaining({ name: 'TracingStartedInBrowser', - ph: 'i', + ph: 'I', cat: 'devtools.timeline', args: { data: expect.objectContaining({ @@ -45,16 +259,15 @@ describe('generateTraceContent', () => { hardwareConcurrency: 1, dataOrigin: 'TraceEvents', version: '1.0.0', - generatedAt: expect.any(String), }, }); }); it('should generate trace content for non-empty events array', () => { - const events: UserTimingTraceEvent[] = [ + const events: TraceEvent[] = [ { name: 'cp:test-operation:start', - ph: 'i', + ph: 'I', ts: 1000, pid: 123, tid: 456, @@ -67,7 +280,7 @@ describe('generateTraceContent', () => { }, { name: 'cp:test-operation:end', - ph: 'i', + ph: 'I', ts: 2000, pid: 123, tid: 456, @@ -87,7 +300,7 @@ describe('generateTraceContent', () => { traceEvents: [ expect.objectContaining({ name: 'TracingStartedInBrowser', - ph: 'i', + ph: 'I', cat: 'devtools.timeline', args: { data: expect.objectContaining({ @@ -119,16 +332,15 @@ describe('generateTraceContent', () => { startTime: expect.any(String), hardwareConcurrency: 1, dataOrigin: 'TraceEvents', - generatedAt: expect.any(String), }, }); }); it('should sort events by timestamp', () => { - const events: UserTimingTraceEvent[] = [ + const events: TraceEvent[] = [ { name: 'cp:second-operation', - ph: 'i', + ph: 'I', ts: 2000, pid: 123, tid: 456, @@ -137,7 +349,7 @@ describe('generateTraceContent', () => { }, { name: 'cp:first-operation', - ph: 'i', + ph: 'I', ts: 1000, pid: 123, tid: 456, @@ -158,10 +370,10 @@ describe('generateTraceContent', () => { }); it('should handle single event with proper margin calculation', () => { - const events: UserTimingTraceEvent[] = [ + const events: TraceEvent[] = [ { name: 'cp:single-event', - ph: 'i', + ph: 'I', ts: 5000, pid: 123, tid: 456, @@ -240,9 +452,9 @@ describe('traceEventWalFormat', () => { it('should encode and decode trace events correctly', () => { const format = traceEventWalFormat(); - const testEvent: UserTimingTraceEvent = { + const testEvent: TraceEvent = { name: 'cp:test-event', - ph: 'i', + ph: 'I', ts: 123_456_789, pid: 123, tid: 456, @@ -254,18 +466,43 @@ describe('traceEventWalFormat', () => { }; const encoded = format.codec.encode(testEvent); - expect(typeof encoded).toBe('string'); const decoded = format.codec.decode(encoded); expect(decoded).toStrictEqual(testEvent); }); + it('should maintain consistency through decode -> encode -> decode round-trip', () => { + const format = traceEventWalFormat(); + const originalEvent: TraceEvent = { + name: 'cp:round-trip-test', + ph: 'I', + ts: 987_654_321, + pid: 789, + tid: 101, + cat: 'blink.user_timing', + args: { + dataType: 'track-entry', + track: 'Round Trip Track', + trackGroup: 'Test Group', + customField: 'custom value', + }, + }; + + const initialEncoded = format.codec.encode(originalEvent); + const firstDecoded = format.codec.decode(initialEncoded); + const secondEncoded = format.codec.encode(firstDecoded); + const secondDecoded = format.codec.decode(secondEncoded); + + expect(secondDecoded).toStrictEqual(firstDecoded); + expect(secondDecoded).toStrictEqual(originalEvent); + }); + it('should finalize records into trace content', () => { const format = traceEventWalFormat(); - const records: UserTimingTraceEvent[] = [ + const records: TraceEvent[] = [ { name: 'cp:operation:start', - ph: 'i', + ph: 'I', ts: 1000, pid: 123, tid: 456, @@ -276,25 +513,9 @@ describe('traceEventWalFormat', () => { const result = format.finalizer(records); - expect(typeof result).toBe('string'); const parsed = JSON.parse(result); expect(parsed).toHaveProperty('traceEvents'); expect(parsed).toHaveProperty('metadata'); expect(parsed.traceEvents).toBeArray(); }); - - it('should include generatedAt in finalizer metadata', () => { - const format = traceEventWalFormat(); - const records: UserTimingTraceEvent[] = []; - - const result = format.finalizer(records); - const parsed = JSON.parse(result); - - expect(parsed.metadata).toHaveProperty('generatedAt'); - expect(typeof parsed.metadata.generatedAt).toBe('string'); - // Should be recent timestamp - expect(new Date(parsed.metadata.generatedAt).getTime()).toBeGreaterThan( - Date.now() - 10_000, - ); - }); }); diff --git a/packages/utils/src/lib/wal-sharded.int.test.ts b/packages/utils/src/lib/wal-sharded.int.test.ts new file mode 100644 index 000000000..1fa37a36e --- /dev/null +++ b/packages/utils/src/lib/wal-sharded.int.test.ts @@ -0,0 +1,259 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { PROFILER_SHARDER_ID_ENV_VAR } from './profiler/constants.js'; +import { ShardedWal } from './wal-sharded.js'; +import { type WalFormat, type WalRecord, stringCodec } from './wal.js'; + +describe('ShardedWal Integration', () => { + const testDir = path.join( + process.cwd(), + 'tmp', + 'int', + 'utils', + 'wal-sharded', + ); + const makeMockFormat = ( + overrides: Partial>, + ): WalFormat => { + const { + baseName = 'wal', + walExtension = '.log', + finalExtension = '.json', + codec = stringCodec(), + finalizer = records => `${JSON.stringify(records)}\n`, + } = overrides; + + return { + baseName, + walExtension, + finalExtension, + codec, + finalizer, + }; + }; + let shardedWal: ShardedWal; + + beforeEach(() => { + if (fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + fs.mkdirSync(testDir, { recursive: true }); + }); + + afterEach(() => { + if (shardedWal) { + shardedWal.cleanupIfCoordinator(); + } + if (fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + }); + + it('should create and finalize shards correctly', () => { + shardedWal = new ShardedWal({ + debug: false, + dir: testDir, + format: makeMockFormat({ + baseName: 'trace', + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'create-finalize', + }); + + const shard1 = shardedWal.shard(); + shard1.open(); + shard1.append('record1'); + shard1.append('record2'); + shard1.close(); + + const shard2 = shardedWal.shard(); + shard2.open(); + shard2.append('record3'); + shard2.close(); + + shardedWal.finalize(); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `trace.create-finalize.json`, + ); + expect(fs.existsSync(finalFile)).toBeTrue(); + + const content = fs.readFileSync(finalFile, 'utf8'); + const records = JSON.parse(content.trim()); + expect(records).toEqual(['record1', 'record2', 'record3']); + }); + + it('should merge multiple shards correctly', () => { + shardedWal = new ShardedWal({ + debug: false, + dir: testDir, + format: makeMockFormat({ + baseName: 'merged', + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'merge-shards', + }); + + // eslint-disable-next-line functional/no-loop-statements + for (let i = 1; i <= 5; i++) { + const shard = shardedWal.shard(); + shard.open(); + shard.append(`record-from-shard-${i}`); + shard.close(); + } + + shardedWal.finalize(); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `merged.merge-shards.json`, + ); + const content = fs.readFileSync(finalFile, 'utf8'); + const records = JSON.parse(content.trim()); + expect(records).toHaveLength(5); + expect(records[0]).toBe('record-from-shard-1'); + expect(records[4]).toBe('record-from-shard-5'); + }); + + it('should handle invalid entries during if debug true', () => { + shardedWal = new ShardedWal({ + debug: true, + dir: testDir, + format: makeMockFormat({ + baseName: 'test', + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'invalid-entries', + }); + + const shard = shardedWal.shard(); + shard.open(); + shard.append('valid1'); + shard.append('invalid'); + shard.append('valid2'); + shard.close(); + + shardedWal.finalize(); + // When debug is true, lastRecover should contain recovery results + expect(shardedWal.stats.lastRecover).toHaveLength(1); + expect(shardedWal.stats.lastRecover[0]).toMatchObject({ + file: expect.stringContaining('test.'), + result: expect.objectContaining({ + records: expect.arrayContaining(['valid1', 'invalid', 'valid2']), + errors: [], + partialTail: null, + }), + }); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `test.invalid-entries.json`, + ); + const content = fs.readFileSync(finalFile, 'utf8'); + const records = JSON.parse(content.trim()); + expect(records).toEqual(['valid1', 'invalid', 'valid2']); + }); + + it('should cleanup shard files after finalization', () => { + shardedWal = new ShardedWal({ + debug: false, + dir: testDir, + format: makeMockFormat({ + baseName: 'cleanup-test', + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'cleanup-test', + }); + + const shard1 = shardedWal.shard(); + shard1.open(); + shard1.append('record1'); + shard1.close(); + + const shard2 = shardedWal.shard(); + shard2.open(); + shard2.append('record2'); + shard2.close(); + + shardedWal.finalize(); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `cleanup-test.cleanup-test.json`, + ); + expect(fs.existsSync(finalFile)).toBeTrue(); + + shardedWal.cleanupIfCoordinator(); + + const groupDir = path.join(testDir, shardedWal.groupId); + const files = fs.readdirSync(groupDir); + expect(files).not.toContain(expect.stringMatching(/cleanup-test.*\.log$/)); + expect(files).toContain(`cleanup-test.cleanup-test.json`); + }); + + it('should use custom options in finalizer', () => { + shardedWal = new ShardedWal({ + debug: false, + dir: testDir, + format: makeMockFormat({ + baseName: 'custom', + finalizer: (records, opt) => + `${JSON.stringify({ records, metadata: opt })}\n`, + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'custom-finalizer', + }); + + const shard = shardedWal.shard(); + shard.open(); + shard.append('record1'); + shard.close(); + + shardedWal.finalize({ version: '2.0', timestamp: Date.now() }); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `custom.custom-finalizer.json`, + ); + const content = fs.readFileSync(finalFile, 'utf8'); + const result = JSON.parse(content.trim()); + expect(result.records).toEqual(['record1']); + expect(result.metadata).toEqual({ + version: '2.0', + timestamp: expect.any(Number), + }); + }); + + it('should handle empty shards correctly', () => { + shardedWal = new ShardedWal({ + debug: false, + dir: testDir, + format: makeMockFormat({ + baseName: 'empty', + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + groupId: 'empty-shards', + }); + + const groupDir = path.join(testDir, shardedWal.groupId); + fs.mkdirSync(groupDir, { recursive: true }); + + shardedWal.finalize(); + + const finalFile = path.join( + testDir, + shardedWal.groupId, + `empty.${shardedWal.groupId}.json`, + ); + expect(fs.existsSync(finalFile)).toBeTrue(); + const content = fs.readFileSync(finalFile, 'utf8'); + expect(content.trim()).toBe('[]'); + }); +}); diff --git a/packages/utils/src/lib/wal-sharded.ts b/packages/utils/src/lib/wal-sharded.ts new file mode 100644 index 000000000..3ea89f3a8 --- /dev/null +++ b/packages/utils/src/lib/wal-sharded.ts @@ -0,0 +1,459 @@ +import * as fs from 'node:fs'; +import path from 'node:path'; +import process from 'node:process'; +import { threadId } from 'node:worker_threads'; +import { extendError } from './errors.js'; +import { + type Counter, + getUniqueInstanceId, + getUniqueTimeId, +} from './process-id.js'; +import { + type InvalidEntry, + type RecoverResult, + type WalFormat, + type WalRecord, + WriteAheadLogFile, + filterValidRecords, +} from './wal.js'; + +/** + * NOTE: this helper is only used in this file. The rest of the repo avoids sync methods so it is not reusable. + * Ensures a directory exists, creating it recursively if necessary using sync methods. + * @param dirPath - The directory path to ensure exists + */ +function ensureDirectoryExistsSync(dirPath: string): void { + if (!fs.existsSync(dirPath)) { + fs.mkdirSync(dirPath, { recursive: true }); + } +} + +/** + * Validates that a groupId is safe to use as a single path segment. + * Rejects path traversal attempts and path separators to prevent writing outside intended directory. + * + * @param groupId - The groupId to validate + * @throws Error if groupId contains unsafe characters or path traversal sequences + */ +function validateGroupId(groupId: string): void { + // Reject empty or whitespace-only groupIds + if (!groupId || groupId.trim().length === 0) { + throw new Error('groupId cannot be empty or whitespace-only'); + } + + // Reject path separators (both forward and backward slashes) + if (groupId.includes('/') || groupId.includes('\\')) { + throw new Error('groupId cannot contain path separators (/ or \\)'); + } + + // Reject relative path components + if (groupId === '..' || groupId === '.') { + throw new Error('groupId cannot be "." or ".."'); + } + + // Reject null bytes which can be used to bypass validation + if (groupId.includes('\0')) { + throw new Error('groupId cannot contain null bytes'); + } + + // Validate that the resolved path stays within the intended directory + // This catches cases where the path library normalizes to a parent directory + const normalized = path.normalize(groupId); + if (normalized !== groupId || normalized.startsWith('..')) { + throw new Error( + `groupId normalization resulted in unsafe path: ${normalized}`, + ); + } +} + +// eslint-disable-next-line functional/no-let +let shardCount = 0; + +/** + * Counter for generating sequential shard IDs. + * Encapsulates the shard count increment logic. + */ +export const ShardedWalCounter: Counter = { + next() { + return ++shardCount; + }, +}; + +/** + * Generates a unique readable instance ID. + * This ID uniquely identifies a shard/file per process/thread combination with a human-readable timestamp. + * Format: readable-timestamp.pid.threadId.counter + * Example: "20240101-120000-000.12345.1.1" + * + * @returns A unique ID string with readable timestamp, process ID, thread ID, and counter + */ +export function getShardId(): string { + return `${getUniqueTimeId()}.${process.pid}.${threadId}.${ShardedWalCounter.next()}`; +} + +/** + * Sharded Write-Ahead Log manager for coordinating multiple WAL shards. + * Handles distributed logging across multiple processes/files with atomic finalization. + */ + +export class ShardedWal { + static instanceCount = 0; + + readonly #id: string = getUniqueInstanceId({ + next() { + return ++ShardedWal.instanceCount; + }, + }); + readonly groupId = getUniqueTimeId(); + readonly #debug: boolean = false; + readonly #format: WalFormat; + readonly #dir: string = process.cwd(); + readonly #coordinatorIdEnvVar: string; + #state: 'active' | 'finalized' | 'cleaned' = 'active'; + #lastRecovery: { + file: string; + result: RecoverResult>; + }[] = []; + #createdShardFiles: string[] = []; + + /** + * Initialize the origin PID environment variable if not already set. + * This must be done as early as possible before any user code runs. + * Sets envVarName to the current process ID if not already defined. + * + * @param envVarName - Environment variable name for storing coordinator ID + * @param profilerID - The profiler ID to set as coordinator + */ + static setCoordinatorProcess(envVarName: string, profilerID: string): void { + if (!process.env[envVarName]) { + process.env[envVarName] = profilerID; + } + } + + /** + * Determines if this process is the leader WAL process using the origin PID heuristic. + * + * The leader is the process that first enabled profiling (the one that set CP_PROFILER_ORIGIN_PID). + * All descendant processes inherit the environment but have different PIDs. + * + * @param envVarName - Environment variable name for storing coordinator ID + * @param profilerID - The profiler ID to check + * @returns true if this is the leader WAL process, false otherwise + */ + static isCoordinatorProcess(envVarName: string, profilerID: string): boolean { + return process.env[envVarName] === profilerID; + } + + /** + * Create a sharded WAL manager. + * + * @param opt.dir - Base directory to store shard files (defaults to process.cwd()) + * @param opt.format - WAL format configuration + * @param opt.groupId - Group ID for sharding (defaults to generated group ID) + * @param opt.coordinatorIdEnvVar - Environment variable name for storing coordinator ID (defaults to CP_SHARDED_WAL_COORDINATOR_ID) + * @param opt.autoCoordinator - Whether to auto-set the coordinator ID on construction (defaults to true) + * @param opt.measureNameEnvVar - Environment variable name for coordinating groupId across processes (optional) + */ + constructor(opt: { + debug?: boolean; + dir?: string; + format: WalFormat; + groupId?: string; + coordinatorIdEnvVar: string; + autoCoordinator?: boolean; + measureNameEnvVar?: string; + }) { + const { + dir, + format, + debug, + groupId, + coordinatorIdEnvVar, + autoCoordinator = true, + measureNameEnvVar, + } = opt; + + if (debug != null) { + this.#debug = debug; + } + + // Determine groupId: use provided, then env var, or generate + // eslint-disable-next-line functional/no-let + let resolvedGroupId: string; + if (groupId != null) { + // User explicitly provided groupId - use it (even if empty, validation will catch it) + resolvedGroupId = groupId; + } else if (measureNameEnvVar && process.env[measureNameEnvVar] != null) { + // Env var is set (by coordinator or previous process) - use it + resolvedGroupId = process.env[measureNameEnvVar]; + } else if (measureNameEnvVar) { + // Env var not set - we're likely the first/coordinator, generate and set it + resolvedGroupId = getUniqueTimeId(); + + process.env[measureNameEnvVar] = resolvedGroupId; + } else { + // No measureNameEnvVar provided - generate unique one (backward compatible) + resolvedGroupId = getUniqueTimeId(); + } + + // Validate groupId for path safety before using it + validateGroupId(resolvedGroupId); + + this.groupId = resolvedGroupId; + + if (dir) { + this.#dir = dir; + } + this.#format = format; + this.#coordinatorIdEnvVar = coordinatorIdEnvVar; + + if (autoCoordinator) { + ShardedWal.setCoordinatorProcess(this.#coordinatorIdEnvVar, this.#id); + } + } + + /** + * Gets the unique instance ID for this ShardedWal. + * + * @returns The unique instance ID + */ + get id(): string { + return this.#id; + } + + /** + * Is this instance the coordinator? + * + * Coordinator status is determined from the coordinatorIdEnvVar environment variable. + * The coordinator handles finalization and cleanup of shard files. + * Checks dynamically to allow coordinator to be set after construction. + * + * @returns true if this instance is the coordinator, false otherwise + */ + isCoordinator(): boolean { + return ShardedWal.isCoordinatorProcess(this.#coordinatorIdEnvVar, this.#id); + } + + /** + * Asserts that the WAL is in 'active' state. + * Throws an error if the WAL has been finalized or cleaned. + * + * @throws Error if WAL is not in 'active' state + */ + private assertActive(): void { + if (this.#state !== 'active') { + throw new Error(`WAL is ${this.#state}, cannot modify`); + } + } + + /** + * Gets the current lifecycle state of the WAL. + * + * @returns Current lifecycle state: 'active', 'finalized', or 'cleaned' + */ + getState(): 'active' | 'finalized' | 'cleaned' { + return this.#state; + } + + /** + * Checks if the WAL has been finalized. + * + * @returns true if WAL is in 'finalized' state, false otherwise + */ + isFinalized(): boolean { + return this.#state === 'finalized'; + } + + /** + * Checks if the WAL has been cleaned. + * + * @returns true if WAL is in 'cleaned' state, false otherwise + */ + isCleaned(): boolean { + return this.#state === 'cleaned'; + } + + /** + * Generates a filename for a shard file using a shard ID. + * Both groupId and shardId are already in readable date format. + * + * Example with baseName "trace" and shardId "20240101-120000-000.12345.1.1": + * Filename: trace.20240101-120000-000.12345.1.1.log + * + * @param shardId - The human-readable shard ID (readable-timestamp.pid.threadId.count format) + * @returns The filename for the shard file + */ + getShardedFileName(shardId: string) { + const { baseName, walExtension } = this.#format; + return `${baseName}.${shardId}${walExtension}`; + } + + /** + * Generates a filename for the final merged output file. + * Uses the groupId as the identifier in the final filename. + * + * Example with baseName "trace" and groupId "20240101-120000-000": + * Filename: trace.20240101-120000-000.json + * + * Example with baseName "trace" and groupId "measureName": + * Filename: trace.measureName.json + * + * @returns The filename for the final merged output file + */ + getFinalFilePath() { + const groupIdDir = path.join(this.#dir, this.groupId); + const { baseName, finalExtension } = this.#format; + + return path.join( + groupIdDir, + `${baseName}.${this.groupId}${finalExtension}`, + ); + } + + shard() { + this.assertActive(); + const filePath = path.join( + this.#dir, + this.groupId, + this.getShardedFileName(getShardId()), + ); + this.#createdShardFiles.push(filePath); + return new WriteAheadLogFile({ + file: filePath, + codec: this.#format.codec, + }); + } + + /** Get all shard file paths matching this WAL's base name */ + private shardFiles() { + if (!fs.existsSync(this.#dir)) { + return []; + } + + const groupDir = path.join(this.#dir, this.groupId); + // create dir if not existing + ensureDirectoryExistsSync(groupDir); + + return fs + .readdirSync(groupDir) + .filter(entry => entry.endsWith(this.#format.walExtension)) + .filter(entry => entry.startsWith(`${this.#format.baseName}`)) + .map(entry => path.join(groupDir, entry)); + } + + /** Get shard file paths created by this instance */ + private getCreatedShardFiles() { + return this.#createdShardFiles.filter(f => fs.existsSync(f)); + } + + /** + * Finalize all shards by merging them into a single output file. + * Recovers all records from all shards, validates no errors, and writes merged result. + * Idempotent: returns early if already finalized or cleaned. + * @throws Error if custom finalizer method throws + */ + finalize(opt?: Record) { + if (this.#state !== 'active') { + return; + } + + // Ensure base directory exists before calling shardFiles() + ensureDirectoryExistsSync(this.#dir); + + const fileRecoveries = this.shardFiles().map(f => ({ + file: f, + result: new WriteAheadLogFile({ + file: f, + codec: this.#format.codec, + }).recover(), + })); + + const records = fileRecoveries.flatMap(({ result }) => result.records); + + if (this.#debug) { + this.#lastRecovery = fileRecoveries; + } + + ensureDirectoryExistsSync(path.dirname(this.getFinalFilePath())); + + try { + fs.writeFileSync( + this.getFinalFilePath(), + this.#format.finalizer(filterValidRecords(records), opt), + ); + } catch (error) { + throw extendError( + error, + 'Could not finalize sharded wal. Finalizer method in format throws.', + { appendMessage: true }, + ); + } + + this.#state = 'finalized'; + } + + /** + * Cleanup shard files by removing them from disk. + * Coordinator-only: throws error if not coordinator to prevent race conditions. + * Idempotent: returns early if already cleaned. + */ + cleanup() { + if (!this.isCoordinator()) { + throw new Error('cleanup() can only be called by coordinator'); + } + + if (this.#state === 'cleaned') { + return; + } + + this.shardFiles() + .filter(f => fs.existsSync(f)) + .forEach(f => { + fs.unlinkSync(f); + }); + + this.#state = 'cleaned'; + } + + get stats() { + // When finalized, count all shard files from filesystem (for multi-process scenarios) + // Otherwise, count only files created by this instance + const shardFileCount = + this.#state === 'finalized' || this.#state === 'cleaned' + ? this.shardFiles().length + : this.getCreatedShardFiles().length; + const shardFilesList = + this.#state === 'finalized' || this.#state === 'cleaned' + ? this.shardFiles() + : this.getCreatedShardFiles(); + + return { + lastRecover: this.#lastRecovery, + state: this.#state, + groupId: this.groupId, + shardCount: this.getCreatedShardFiles().length, + isCoordinator: this.isCoordinator(), + isFinalized: this.isFinalized(), + isCleaned: this.isCleaned(), + finalFilePath: this.getFinalFilePath(), + shardFileCount, + shardFiles: shardFilesList, + }; + } + + finalizeIfCoordinator(opt?: Record) { + if (this.isCoordinator()) { + this.finalize(opt); + } + } + + /** + * Cleanup shard files if this instance is the coordinator. + * Safe to call from any process - only coordinator will execute cleanup. + */ + cleanupIfCoordinator() { + if (this.isCoordinator()) { + this.cleanup(); + } + } +} diff --git a/packages/utils/src/lib/wal-sharded.unit.test.ts b/packages/utils/src/lib/wal-sharded.unit.test.ts new file mode 100644 index 000000000..455cb12a1 --- /dev/null +++ b/packages/utils/src/lib/wal-sharded.unit.test.ts @@ -0,0 +1,575 @@ +import { vol } from 'memfs'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { MEMFS_VOLUME, osAgnosticPath } from '@code-pushup/test-utils'; +import { getUniqueInstanceId } from './process-id.js'; +import { PROFILER_SHARDER_ID_ENV_VAR } from './profiler/constants.js'; +import { ShardedWal } from './wal-sharded.js'; +import { + type WalFormat, + WriteAheadLogFile, + parseWalFormat, + stringCodec, +} from './wal.js'; + +const read = (p: string) => vol.readFileSync(p, 'utf8') as string; + +const getShardedWal = (overrides?: { + dir?: string; + format?: Partial; + measureNameEnvVar?: string; + autoCoordinator?: boolean; + groupId?: string; +}) => { + const { format, ...rest } = overrides ?? {}; + return new ShardedWal({ + debug: false, + dir: '/test/shards', + format: parseWalFormat({ + baseName: 'test-wal', + ...format, + }), + coordinatorIdEnvVar: PROFILER_SHARDER_ID_ENV_VAR, + ...rest, + }); +}; + +describe('ShardedWal', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + // Clear coordinator env var for fresh state + // eslint-disable-next-line functional/immutable-data, @typescript-eslint/no-dynamic-delete + delete process.env[PROFILER_SHARDER_ID_ENV_VAR]; + // Clear measure name env var to avoid test pollution + // eslint-disable-next-line functional/immutable-data, @typescript-eslint/no-dynamic-delete + delete process.env.CP_PROFILER_MEASURE_NAME; + }); + + describe('initialization', () => { + it('should create instance with directory and format', () => { + const sw = getShardedWal(); + expect(sw).toBeInstanceOf(ShardedWal); + }); + + it('should expose a stable id via getter', () => { + const sw = getShardedWal(); + const firstId = sw.id; + expect(sw.id).toBe(firstId); + }); + + it('should use groupId from env var when measureNameEnvVar is set', () => { + // eslint-disable-next-line functional/immutable-data + process.env.CP_PROFILER_MEASURE_NAME = 'from-env'; + const sw = getShardedWal({ + measureNameEnvVar: 'CP_PROFILER_MEASURE_NAME', + }); + expect(sw.groupId).toBe('from-env'); + expect(process.env.CP_PROFILER_MEASURE_NAME).toBe('from-env'); + }); + + it('should set env var when measureNameEnvVar is provided and unset', () => { + // eslint-disable-next-line functional/immutable-data + delete process.env.CP_PROFILER_MEASURE_NAME; + const sw = getShardedWal({ + measureNameEnvVar: 'CP_PROFILER_MEASURE_NAME', + }); + expect(process.env.CP_PROFILER_MEASURE_NAME).toBe(sw.groupId); + }); + }); + + describe('path traversal validation', () => { + it('should reject groupId with forward slashes', () => { + expect(() => getShardedWal({ groupId: '../etc/passwd' })).toThrow( + 'groupId cannot contain path separators (/ or \\)', + ); + }); + + it('should reject groupId with backward slashes', () => { + expect(() => getShardedWal({ groupId: '..\\windows\\system32' })).toThrow( + 'groupId cannot contain path separators (/ or \\)', + ); + }); + + it('should reject groupId with parent directory reference', () => { + expect(() => getShardedWal({ groupId: '..' })).toThrow( + 'groupId cannot be "." or ".."', + ); + }); + + it('should reject groupId with current directory reference', () => { + expect(() => getShardedWal({ groupId: '.' })).toThrow( + 'groupId cannot be "." or ".."', + ); + }); + + it('should reject groupId with null bytes', () => { + expect(() => getShardedWal({ groupId: 'test\0malicious' })).toThrow( + 'groupId cannot contain null bytes', + ); + }); + + it('should reject empty groupId', () => { + expect(() => getShardedWal({ groupId: '' })).toThrow( + 'groupId cannot be empty or whitespace-only', + ); + }); + + it('should reject whitespace-only groupId', () => { + expect(() => getShardedWal({ groupId: ' ' })).toThrow( + 'groupId cannot be empty or whitespace-only', + ); + }); + + it('should accept safe alphanumeric groupId', () => { + const sw = getShardedWal({ groupId: 'safe-group-123' }); + expect(sw.groupId).toBe('safe-group-123'); + }); + + it('should accept groupId with underscores and hyphens', () => { + const sw = getShardedWal({ groupId: 'test_group-name' }); + expect(sw.groupId).toBe('test_group-name'); + }); + + it('should reject groupId from env var with path traversal', () => { + // eslint-disable-next-line functional/immutable-data + process.env.CP_PROFILER_MEASURE_NAME = '../malicious'; + expect(() => + getShardedWal({ + measureNameEnvVar: 'CP_PROFILER_MEASURE_NAME', + }), + ).toThrow('groupId cannot contain path separators (/ or \\)'); + }); + }); + + describe('shard management', () => { + it('should create shard with correct file path', () => { + const sw = getShardedWal({ + format: { baseName: 'trace', walExtension: '.log' }, + }); + const shard = sw.shard(); + expect(shard).toBeInstanceOf(WriteAheadLogFile); + // Shard files use getShardId() format (timestamp.pid.threadId.counter) + // The groupId is auto-generated and used in the shard path + // Normalize path before regex matching to handle OS-specific separators + expect(osAgnosticPath(shard.getPath())).toMatch( + /^\/shards\/\d{8}-\d{6}-\d{3}\/trace\.\d{8}-\d{6}-\d{3}(?:\.\d+){3}\.log$/, + ); + expect(shard.getPath()).toEndWithPath('.log'); + }); + + it('should create shard with default shardId when no argument provided', () => { + const sw = getShardedWal({ + format: { baseName: 'trace', walExtension: '.log' }, + }); + const shard = sw.shard(); + expect(shard.getPath()).toStartWithPath( + '/shards/20231114-221320-000/trace.20231114-221320-000.10001', + ); + expect(shard.getPath()).toEndWithPath('.log'); + }); + }); + + describe('file operations', () => { + it('should list no shard files when directory does not exist', () => { + const sw = getShardedWal({ dir: '/nonexistent' }); + const files = (sw as any).shardFiles(); + expect(files).toEqual([]); + }); + + it('should list no shard files when directory is empty', () => { + const sw = getShardedWal({ dir: '/empty' }); + vol.mkdirSync('/empty/20231114-221320-000', { recursive: true }); + const files = (sw as any).shardFiles(); + expect(files).toEqual([]); + }); + + it('should list shard files matching extension', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/trace.19700101-000820-001.1.log': + 'content1', + '/shards/20231114-221320-000/trace.19700101-000820-002.2.log': + 'content2', + '/shards/other.txt': 'not a shard', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'trace', walExtension: '.log' }, + }); + const files = (sw as any).shardFiles(); + + expect(files).toHaveLength(2); + expect(files).toEqual( + expect.arrayContaining([ + expect.pathToMatch( + '/shards/20231114-221320-000/trace.19700101-000820-001.1.log', + ), + expect.pathToMatch( + '/shards/20231114-221320-000/trace.19700101-000820-002.2.log', + ), + ]), + ); + }); + }); + + describe('finalization', () => { + it('should finalize empty shards to empty result', () => { + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'final', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + vol.mkdirSync('/shards/20231114-221320-000', { recursive: true }); + sw.finalize(); + + expect( + read('/shards/20231114-221320-000/final.20231114-221320-000.json'), + ).toBe('[]\n'); + }); + + it('should finalize multiple shards into single file', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/merged.20240101-120000-001.1.log': + 'record1\n', + '/shards/20231114-221320-000/merged.20240101-120000-002.2.log': + 'record2\n', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'merged', + walExtension: '.log', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + sw.finalize(); + + const result = JSON.parse( + read( + '/shards/20231114-221320-000/merged.20231114-221320-000.json', + ).trim(), + ); + expect(result).toEqual(['record1', 'record2']); + }); + + it('should handle invalid entries during finalize', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/final.20240101-120000-001.1.log': + 'valid\n', + '/shards/20231114-221320-000/final.20240101-120000-002.2.log': + 'invalid\n', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'final', + walExtension: '.log', + finalExtension: '.json', + codec: stringCodec(), + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + sw.finalize(); + + const result = JSON.parse( + read( + '/shards/20231114-221320-000/final.20231114-221320-000.json', + ).trim(), + ); + expect(result).toHaveLength(2); + expect(result[0]).toBe('valid'); + expect(result[1]).toBe('invalid'); + }); + + it('should use custom options in finalizer', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/final.20231114-221320-000.10001.2.1.log': + 'record1\n', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'final', + walExtension: '.log', + finalExtension: '.json', + finalizer: (records, opt) => + `${JSON.stringify({ records, meta: opt })}\n`, + }, + }); + + sw.finalize({ version: '1.0', compressed: true }); + + const result = JSON.parse( + read('/shards/20231114-221320-000/final.20231114-221320-000.json'), + ); + expect(result.records).toEqual(['record1']); + expect(result.meta).toEqual({ version: '1.0', compressed: true }); + }); + }); + + describe('cleanup', () => { + it('should throw error when cleanup is called by non-coordinator', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + autoCoordinator: false, + }); + + // Instance won't be coordinator, so cleanup() should throw + expect(() => sw.cleanup()).toThrow( + 'cleanup() can only be called by coordinator', + ); + }); + + it('should handle cleanupIfCoordinator when not coordinator', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + autoCoordinator: false, + }); + + // cleanupIfCoordinator should be no-op when not coordinator + sw.cleanupIfCoordinator(); + + // Files should still exist + expect(vol.toJSON()).not.toStrictEqual({}); + expect(sw.getState()).toBe('active'); + }); + + it('should handle cleanup when some shard files do not exist', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + }); + + vol.unlinkSync( + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log', + ); + + // cleanupIfCoordinator won't throw even if files don't exist + expect(() => sw.cleanupIfCoordinator()).not.toThrow(); + }); + + it('should ignore directory removal failures during cleanup', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + '/shards/20231114-221320-000/keep.txt': 'keep', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + }); + + expect(() => sw.cleanup()).not.toThrow(); + expect( + vol.readFileSync('/shards/20231114-221320-000/keep.txt', 'utf8'), + ).toBe('keep'); + }); + }); + + describe('lifecycle state', () => { + it('throws with appended finalizer error when finalize fails', () => { + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'test', + finalExtension: '.json', + finalizer: () => { + throw new Error('finalizer boom'); + }, + }, + }); + + expect(() => sw.finalize()).toThrow( + /Could not finalize sharded wal\. Finalizer method in format throws\./, + ); + expect(() => sw.finalize()).toThrow(/finalizer boom/); + expect(sw.getState()).toBe('active'); + }); + + it('should start in active state', () => { + const sw = getShardedWal(); + expect(sw.getState()).toBe('active'); + expect(sw.isFinalized()).toBeFalse(); + expect(sw.isCleaned()).toBeFalse(); + }); + + it('should transition to finalized state after finalize', () => { + vol.mkdirSync('/shards/20231114-221320-000', { recursive: true }); + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'test', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + sw.finalize(); + + expect(sw.getState()).toBe('finalized'); + expect(sw.isFinalized()).toBeTrue(); + expect(sw.isCleaned()).toBeFalse(); + }); + + it('should transition to cleaned state after cleanup (when coordinator)', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + }); + + sw.cleanupIfCoordinator(); + + const state = sw.getState(); + expect(['active', 'cleaned']).toContain(state); + }); + + it('should make cleanup idempotent for coordinator', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + }); + + sw.cleanup(); + expect(sw.getState()).toBe('cleaned'); + + expect(() => sw.cleanup()).not.toThrow(); + expect(sw.getState()).toBe('cleaned'); + }); + + it('should prevent shard creation after finalize', () => { + vol.mkdirSync('/shards/20231114-221320-000', { recursive: true }); + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'test', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + sw.finalize(); + + expect(() => sw.shard()).toThrow('WAL is finalized, cannot modify'); + }); + + it('should prevent shard creation after cleanup', () => { + vol.fromJSON({ + '/shards/20231114-221320-000/test.20231114-221320-000.10001.2.1.log': + 'content1', + }); + + // Generate the instance ID that will be used by the constructor + // The constructor increments ShardedWal.instanceCount, so we need to + // generate the ID using the value that will be used (current + 1) + // without actually modifying ShardedWal.instanceCount + const nextCount = ShardedWal.instanceCount + 1; + const instanceId = getUniqueInstanceId({ + next() { + return nextCount; + }, + }); + + // Set coordinator BEFORE creating instance + ShardedWal.setCoordinatorProcess(PROFILER_SHARDER_ID_ENV_VAR, instanceId); + + const sw = getShardedWal({ + dir: '/shards', + format: { baseName: 'test', walExtension: '.log' }, + }); + + sw.cleanupIfCoordinator(); + + expect(() => sw.shard()).toThrow('WAL is cleaned, cannot modify'); + }); + + it('should make finalize idempotent', () => { + vol.mkdirSync('/shards/20231114-221320-000', { recursive: true }); + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'test', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + sw.finalize(); + expect(sw.getState()).toBe('finalized'); + + // Call again - should not throw and should remain finalized + sw.finalize(); + expect(sw.getState()).toBe('finalized'); + }); + + it('should prevent finalize after cleanup', () => { + // Generate the instance ID that will be used by the constructor + // The constructor increments ShardedWal.instanceCount, so we need to + // generate the ID using the value that will be used (current + 1) + // without actually modifying ShardedWal.instanceCount + const nextCount = ShardedWal.instanceCount + 1; + const instanceId = getUniqueInstanceId({ + next() { + return nextCount; + }, + }); + + // Set coordinator BEFORE creating instance + ShardedWal.setCoordinatorProcess(PROFILER_SHARDER_ID_ENV_VAR, instanceId); + + const sw = getShardedWal({ + dir: '/shards', + format: { + baseName: 'test', + walExtension: '.log', + finalExtension: '.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }, + }); + + expect(sw.stats.shardFiles).toHaveLength(0); + sw.shard(); + expect(sw.stats.shardFiles).toHaveLength(0); + + sw.cleanupIfCoordinator(); + expect(sw.getState()).toBe('cleaned'); + expect(sw.stats.shardFiles).toHaveLength(0); + }); + }); +}); diff --git a/packages/utils/src/lib/wal.int.test.ts b/packages/utils/src/lib/wal.int.test.ts new file mode 100644 index 000000000..81c71709b --- /dev/null +++ b/packages/utils/src/lib/wal.int.test.ts @@ -0,0 +1,127 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + type Codec, + type WalRecord, + WriteAheadLogFile, + stringCodec, +} from './wal.js'; + +describe('WriteAheadLogFile Integration', () => { + const testDir = path.join(process.cwd(), 'tmp', 'int', 'utils', 'wal'); + let walFile: WriteAheadLogFile; + + beforeEach(async () => { + // Clean up test directory + await fs.rm(testDir, { recursive: true, force: true }); + await fs.mkdir(testDir, { recursive: true }); + }); + + afterEach(async () => { + if (walFile && !walFile.isClosed()) { + walFile.close(); + } + await fs.rm(testDir, { recursive: true, force: true }); + }); + + it('should recover from file with partial write', async () => { + const filePath = path.join(testDir, 'partial.log'); + walFile = new WriteAheadLogFile({ file: filePath, codec: stringCodec() }); + + walFile.open(); + walFile.append('complete1'); + walFile.append('complete2'); + walFile.close(); + + // Simulate partial write by appending incomplete line + await fs.appendFile(filePath, '"partial'); + + const recovered = walFile.recover(); + expect(recovered.records).toEqual(['complete1', 'complete2']); + expect(recovered.partialTail).toBe('"partial'); + }); + + it('should repack file removing invalid entries', () => { + const filePath = path.join(testDir, 'repack.log'); + const tolerantCodec: Codec = { + encode: v => (typeof v === 'string' ? v : JSON.stringify(v)), + decode: (s: string) => { + if (s === 'invalid') throw new Error('Invalid record'); + return s; + }, + }; + + walFile = new WriteAheadLogFile({ file: filePath, codec: tolerantCodec }); + walFile.open(); + walFile.append('valid1'); + walFile.append('invalid'); + walFile.append('valid2'); + walFile.close(); + + walFile.repack(); + + const recovered = walFile.recover(); + expect(recovered.records).toEqual(['valid1', 'valid2']); + }); + + it('should handle error recovery scenarios', () => { + const filePath = path.join(testDir, 'errors.log'); + const failingCodec: Codec = { + encode: v => (typeof v === 'string' ? v : JSON.stringify(v)), + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }; + + walFile = new WriteAheadLogFile({ file: filePath, codec: failingCodec }); + walFile.open(); + walFile.append('good'); + walFile.append('bad'); + walFile.append('good'); + walFile.close(); + + const recovered = walFile.recover(); + expect(recovered.records).toEqual([ + 'good', + { __invalid: true, raw: 'bad' }, + 'good', + ]); + expect(recovered.errors).toEqual([]); + }); + + it('should handle object records correctly', () => { + const filePath = path.join(testDir, 'objects.log'); + walFile = new WriteAheadLogFile({ + file: filePath, + codec: stringCodec(), + }); + + walFile.open(); + walFile.append({ id: 1, name: 'test1' }); + walFile.append({ id: 2, name: 'test2' }); + walFile.close(); + + const recovered = walFile.recover(); + expect(recovered.records).toEqual([ + { id: 1, name: 'test1' }, + { id: 2, name: 'test2' }, + ]); + }); + + it('should perform complete write/recover cycle', () => { + const filePath = path.join(testDir, 'test.log'); + walFile = new WriteAheadLogFile({ file: filePath, codec: stringCodec() }); + + walFile.open(); + walFile.append('record1'); + walFile.append('record2'); + walFile.close(); + + const recovered = walFile.recover(); + expect(recovered.records).toEqual(['record1', 'record2']); + expect(recovered.errors).toEqual([]); + expect(recovered.partialTail).toBeNull(); + }); +}); diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index f0dc87a83..a4d1453ce 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -3,6 +3,11 @@ import * as fs from 'node:fs'; import path from 'node:path'; import process from 'node:process'; import { threadId } from 'node:worker_threads'; +import { + type Counter, + getUniqueInstanceId, + getUniqueTimeId, +} from './process-id.js'; /** * Codec for encoding/decoding values to/from strings for WAL storage. @@ -372,8 +377,14 @@ export function setCoordinatorProcess( } } -// eslint-disable-next-line functional/no-let -let shardCount = 0; +/** + * Simple counter implementation for generating sequential IDs. + */ +const shardCounter: Counter = (() => { + // eslint-disable-next-line functional/no-let + let count = 0; + return { next: () => ++count }; +})(); /** * Generates a unique sharded WAL ID based on performance time origin, process ID, thread ID, and instance count. @@ -383,60 +394,6 @@ function getShardedWalId() { return `${Math.round(performance.timeOrigin)}.${process.pid}.${threadId}.${++ShardedWal.instanceCount}`; } -/** - * Generates a human-readable shard ID. - * This ID is unique per process/thread/shard combination and used in the file name. - * Format: readable-timestamp.pid.threadId.shardCount - * Example: "20240101-120000-000.12345.1.1" - * Becomes file: trace.20240101-120000-000.12345.1.1.log - */ -export function getShardId(): string { - const timestamp = Math.round(performance.timeOrigin + performance.now()); - const readableTimestamp = sortableReadableDateString(`${timestamp}`); - return `${readableTimestamp}.${process.pid}.${threadId}.${++shardCount}`; -} - -/** - * Generates a human-readable sharded group ID. - * This ID is a globally unique, sortable, human-readable date string per run. - * Used directly as the folder name to group shards. - * Format: yyyymmdd-hhmmss-ms - * Example: "20240101-120000-000" - */ -export function getShardedGroupId(): string { - return sortableReadableDateString( - `${Math.round(performance.timeOrigin + performance.now())}`, - ); -} - -/** - * Regex patterns for validating WAL ID formats - */ -export const WAL_ID_PATTERNS = { - /** Readable date format: yyyymmdd-hhmmss-ms */ - READABLE_DATE: /^\d{8}-\d{6}-\d{3}$/, - /** Group ID format: yyyymmdd-hhmmss-ms */ - GROUP_ID: /^\d{8}-\d{6}-\d{3}$/, - /** Shard ID format: readable-date.pid.threadId.count */ - SHARD_ID: /^\d{8}-\d{6}-\d{3}(?:\.\d+){3}$/, -} as const; - -export function sortableReadableDateString(timestampMs: string): string { - const timestamp = Number.parseInt(timestampMs, 10); - const date = new Date(timestamp); - const MILLISECONDS_PER_SECOND = 1000; - const yyyy = date.getFullYear(); - const mm = String(date.getMonth() + 1).padStart(2, '0'); - const dd = String(date.getDate()).padStart(2, '0'); - const hh = String(date.getHours()).padStart(2, '0'); - const min = String(date.getMinutes()).padStart(2, '0'); - const ss = String(date.getSeconds()).padStart(2, '0'); - // eslint-disable-next-line @typescript-eslint/no-magic-numbers - const ms = String(timestamp % MILLISECONDS_PER_SECOND).padStart(3, '0'); - - return `${yyyy}${mm}${dd}-${hh}${min}${ss}-${ms}`; -} - /** * Ensures a directory exists, creating it recursively if necessary using sync methods. * @param dirPath - The directory path to ensure exists @@ -446,177 +403,3 @@ function ensureDirectoryExistsSync(dirPath: string): void { fs.mkdirSync(dirPath, { recursive: true }); } } - -/** - * Generates a path to a shard file using human-readable IDs. - * Both groupId and shardId are already in readable date format. - * - * Example with groupId "20240101-120000-000" and shardId "20240101-120000-000.12345.1.1": - * Full path: /base/20240101-120000-000/trace.20240101-120000-000.12345.1.1.log - * - * @param opt.dir - The directory to store the shard file - * @param opt.format - The WalFormat to use for the shard file - * @param opt.groupId - The human-readable group ID (yyyymmdd-hhmmss-ms format) - * @param opt.shardId - The human-readable shard ID (readable-timestamp.pid.threadId.count format) - * @returns The path to the shard file - */ -export function getShardedPath(opt: { - dir?: string; - format: WalFormat; - groupId: string; - shardId: string; -}): string { - const { dir = '', format, groupId, shardId } = opt; - const { baseName, walExtension } = format; - - return path.join(dir, groupId, `${baseName}.${shardId}${walExtension}`); -} - -export function getShardedFinalPath(opt: { - dir?: string; - format: WalFormat; - groupId: string; -}): string { - const { dir = '', format, groupId } = opt; - const { baseName, finalExtension } = format; - - return path.join(dir, groupId, `${baseName}.${groupId}${finalExtension}`); -} - -/** - * Sharded Write-Ahead Log manager for coordinating multiple WAL shards. - * Handles distributed logging across multiple processes/files with atomic finalization. - */ - -export class ShardedWal { - static instanceCount = 0; - readonly #id: string = getShardedWalId(); - readonly groupId = getShardedGroupId(); - readonly #format: WalFormat; - readonly #dir: string = process.cwd(); - readonly #isCoordinator: boolean; - - /** - * Create a sharded WAL manager. - * - * @param opt.dir - Base directory to store shard files (defaults to process.cwd()) - * @param opt.format - WAL format configuration - * @param opt.groupId - Group ID for sharding (defaults to generated group ID) - * @param opt.coordinatorIdEnvVar - Environment variable name for storing coordinator ID (defaults to CP_SHARDED_WAL_COORDINATOR_ID) - */ - constructor(opt: { - dir?: string; - format: Partial>; - groupId?: string; - coordinatorIdEnvVar: string; - }) { - const { dir, format, groupId, coordinatorIdEnvVar } = opt; - this.groupId = groupId ?? getShardedGroupId(); - if (dir) { - this.#dir = dir; - } - this.#format = parseWalFormat(format); - this.#isCoordinator = isCoordinatorProcess(coordinatorIdEnvVar, this.#id); - } - - /** - * Is this instance the coordinator? - * - * Coordinator status is determined from the coordinatorIdEnvVar environment variable. - * The coordinator handles finalization and cleanup of shard files. - * - * @returns true if this instance is the coordinator, false otherwise - */ - isCoordinator(): boolean { - return this.#isCoordinator; - } - - shard(shardId: string = getShardId()) { - return new WriteAheadLogFile({ - file: getShardedPath({ - dir: this.#dir, - format: this.#format, - groupId: this.groupId, - shardId, - }), - codec: this.#format.codec, - }); - } - - /** Get all shard file paths matching this WAL's base name */ - private shardFiles() { - if (!fs.existsSync(this.#dir)) { - return []; - } - - const groupIdDir = path.dirname( - getShardedFinalPath({ - dir: this.#dir, - format: this.#format, - groupId: this.groupId, - }), - ); - // create dir if not existing - ensureDirectoryExistsSync(groupIdDir); - - return fs - .readdirSync(groupIdDir) - .filter(entry => entry.endsWith(this.#format.walExtension)) - .filter(entry => entry.startsWith(`${this.#format.baseName}`)) - .map(entry => path.join(groupIdDir, entry)); - } - - /** - * Finalize all shards by merging them into a single output file. - * Recovers all records from all shards, validates no errors, and writes merged result. - * @throws Error if any shard contains decode errors - */ - finalize(opt?: Record) { - const fileRecoveries = this.shardFiles().map(f => ({ - file: f, - recovery: new WriteAheadLogFile({ - file: f, - codec: this.#format.codec, - }).recover(), - })); - - const records = fileRecoveries.flatMap(({ recovery }) => recovery.records); - - // Check if any records are invalid entries (from tolerant codec) - const hasInvalidEntries = records.some( - r => typeof r === 'object' && r != null && '__invalid' in r, - ); - - const recordsToFinalize = hasInvalidEntries - ? records - : filterValidRecords(records); - const out = getShardedFinalPath({ - dir: this.#dir, - format: this.#format, - groupId: this.groupId, - }); - ensureDirectoryExistsSync(path.dirname(out)); - fs.writeFileSync(out, this.#format.finalizer(recordsToFinalize, opt)); - } - - cleanup() { - this.shardFiles().forEach(f => { - // Remove the shard file - fs.unlinkSync(f); - // Remove the parent directory (shard group directory) - const shardDir = path.dirname(f); - try { - fs.rmdirSync(shardDir); - } catch { - // Directory might not be empty or already removed, ignore - } - }); - - // Also try to remove the root directory if it becomes empty - try { - fs.rmdirSync(this.#dir); - } catch { - // Directory might not be empty or already removed, ignore - } - } -} diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 4221d4f0f..179fb7a1b 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -5,12 +5,9 @@ import { type Codec, type InvalidEntry, ShardedWal, - WAL_ID_PATTERNS, WriteAheadLogFile, createTolerantCodec, filterValidRecords, - getShardId, - getShardedGroupId, isCoordinatorProcess, parseWalFormat, recoverFromContent, @@ -504,71 +501,6 @@ describe('stringCodec', () => { }); }); -describe('getShardId', () => { - it('should generate shard ID with readable timestamp', () => { - const result = getShardId(); - - expect(result).toMatch(WAL_ID_PATTERNS.SHARD_ID); - expect(result).toStartWith('20231114-221320-000.'); - }); - - it('should generate different shard IDs for different calls', () => { - const result1 = getShardId(); - const result2 = getShardId(); - - expect(result1).not.toBe(result2); - expect(result1).toStartWith('20231114-221320-000.'); - expect(result2).toStartWith('20231114-221320-000.'); - }); - - it('should handle zero values', () => { - const result = getShardId(); - expect(result).toStartWith('20231114-221320-000.'); - }); - - it('should handle negative timestamps', () => { - const result = getShardId(); - - expect(result).toStartWith('20231114-221320-000.'); - }); - - it('should handle large timestamps', () => { - const result = getShardId(); - - expect(result).toStartWith('20231114-221320-000.'); - }); - - it('should generate incrementing counter', () => { - const result1 = getShardId(); - const result2 = getShardId(); - - const parts1 = result1.split('.'); - const parts2 = result2.split('.'); - const counter1 = parts1.at(-1) as string; - const counter2 = parts2.at(-1) as string; - - expect(Number.parseInt(counter1, 10)).toBe( - Number.parseInt(counter2, 10) - 1, - ); - }); -}); - -describe('getShardedGroupId', () => { - it('should work with mocked timeOrigin', () => { - const result = getShardedGroupId(); - - expect(result).toBe('20231114-221320-000'); - expect(result).toMatch(WAL_ID_PATTERNS.GROUP_ID); - }); - - it('should be idempotent within same process', () => { - const result1 = getShardedGroupId(); - const result2 = getShardedGroupId(); - - expect(result1).toBe(result2); - }); -}); - describe('parseWalFormat', () => { it('should apply all defaults when given empty config', () => { const result = parseWalFormat({}); @@ -819,7 +751,7 @@ describe('ShardedWal', () => { }, coordinatorIdEnvVar: SHARDED_WAL_COORDINATOR_ID_ENV_VAR, }); - // Create the group directory (matches actual getShardedGroupId() output) + // Create the group directory (matches actual getUniqueTimeId() output) vol.mkdirSync('/empty/20231114-221320-000', { recursive: true }); const files = (sw as any).shardFiles(); expect(files).toEqual([]); diff --git a/testing/test-setup-config/src/lib/vitest-setup-files.ts b/testing/test-setup-config/src/lib/vitest-setup-files.ts index ccc34bbea..50aa9c9f8 100644 --- a/testing/test-setup-config/src/lib/vitest-setup-files.ts +++ b/testing/test-setup-config/src/lib/vitest-setup-files.ts @@ -27,6 +27,7 @@ const UNIT_TEST_SETUP_FILES = [ '../../testing/test-setup/src/lib/performance.setup-file.ts', '../../testing/test-setup/src/lib/portal-client.mock.ts', '../../testing/test-setup/src/lib/process.setup-file.ts', + '../../testing/test-setup/src/lib/os.setup-file.ts', ...CUSTOM_MATCHERS, ] as const; diff --git a/testing/test-setup/src/lib/os.setup-file.ts b/testing/test-setup/src/lib/os.setup-file.ts new file mode 100644 index 000000000..6432fa375 --- /dev/null +++ b/testing/test-setup/src/lib/os.setup-file.ts @@ -0,0 +1,14 @@ +import os from 'node:os'; +import { afterEach, beforeEach, vi } from 'vitest'; + +export const MOCK_AVAILABLE_PARALLELISM = 1; + +const availableParallelismMock = vi.spyOn(os, 'availableParallelism'); + +beforeEach(() => { + availableParallelismMock.mockReturnValue(MOCK_AVAILABLE_PARALLELISM); +}); + +afterEach(() => { + availableParallelismMock.mockClear(); +}); diff --git a/testing/test-setup/tsconfig.lib.json b/testing/test-setup/tsconfig.lib.json index 3cc313086..a01be080b 100644 --- a/testing/test-setup/tsconfig.lib.json +++ b/testing/test-setup/tsconfig.lib.json @@ -3,7 +3,7 @@ "compilerOptions": { "outDir": "../dist/out-tsc", "declaration": true, - "types": ["node"] + "types": ["vitest/globals", "vitest/importMeta", "vite/client", "node"] }, "include": ["src/**/*.ts"], "exclude": [ diff --git a/testing/test-utils/src/index.ts b/testing/test-utils/src/index.ts index 38ce50155..5c4460e85 100644 --- a/testing/test-utils/src/index.ts +++ b/testing/test-utils/src/index.ts @@ -10,5 +10,5 @@ export * from './lib/utils/file-system.js'; export * from './lib/utils/create-npm-workshpace.js'; export * from './lib/utils/project-graph.js'; export * from './lib/utils/test-folder-setup.js'; -export * from './lib/utils/omit-trace-json.js'; export * from './lib/utils/profiler.mock.js'; +export * from './lib/utils/omit-trace-json.js';