From bce9f1ffa31ae9fc2ac5d78e17ea397524e46265 Mon Sep 17 00:00:00 2001 From: bencmbrook <7354176+bencmbrook@users.noreply.github.com> Date: Thu, 19 Mar 2026 15:27:35 -0400 Subject: [PATCH 01/10] Port multi-identifier preference uploads. Bring the legacy CLI preference-upload refactor into the monorepo, including the new configure flow, schema-backed parallel uploads, receipts state, the upload skill, and the reconcile helper script. Made-with: Cursor --- .changeset/late-terms-apply.md | 7 + .../skills/preference-data-upload/SKILL.md | 429 +++++++++ packages/cli/README.md | 94 +- packages/cli/package.json | 1 + .../scripts/reconcile-preference-records.ts | 846 ++++++++++++++++++ .../configure-preference-upload/command.ts | 53 ++ .../configure-preference-upload/impl.ts | 363 ++++++++ packages/cli/src/commands/consent/routes.ts | 2 + .../artifacts/receipts/index.ts | 1 + .../artifacts/receipts/receiptsState.ts | 147 +++ .../receipts/tests/receiptsState.test.ts | 53 ++ .../upload-preferences/buildTaskOptions.ts | 86 ++ .../consent/upload-preferences/command.ts | 114 ++- .../consent/upload-preferences/impl.ts | 418 +++++++-- .../consent/upload-preferences/readme.ts | 6 +- .../consent/upload-preferences/schemaState.ts | 89 ++ .../upload/batchUploader.ts | 1 + .../upload/buildInteractiveUploadPlan.ts | 183 ++++ .../upload-preferences/upload/index.ts | 6 + .../interactivePreferenceUploaderFromPlan.ts | 343 +++++++ .../upload/transform/buildPendingUpdates.ts | 143 +++ .../upload/transform/index.ts | 2 + .../upload/transform/transformCsv.ts | 80 ++ .../upload-preferences/upload/types.ts | 8 + .../consent/upload-preferences/worker.ts | 194 ++++ .../src/lib/graphql/gqls/RequestDataSilo.ts | 1 + packages/cli/src/lib/pooling/logRotation.ts | 17 +- .../src/lib/preference-management/codecs.ts | 80 +- .../getPreferencesForIdentifiers.ts | 177 +++- .../src/lib/preference-management/index.ts | 3 +- .../parsePreferenceAndPurposeValuesFromCsv.ts | 175 +++- ...ts => parsePreferenceFileFormatFromCsv.ts} | 52 +- .../parsePreferenceIdentifiersFromCsv.ts | 244 +++-- .../parsePreferenceManagementCsv.ts | 219 +++-- .../fetchConsentPreferencesChunked.test.ts | 2 +- .../getPreferencesForIdentifiers.test.ts | 31 +- ...ferenceManagementPreferencesInteractive.ts | 267 ------ packages/cli/src/lib/tests/codebase.test.ts | 10 +- 38 files changed, 4258 insertions(+), 689 deletions(-) create mode 100644 .changeset/late-terms-apply.md create mode 100644 .cursor/skills/preference-data-upload/SKILL.md create mode 100644 packages/cli/scripts/reconcile-preference-records.ts create mode 100644 packages/cli/src/commands/consent/configure-preference-upload/command.ts create mode 100644 packages/cli/src/commands/consent/configure-preference-upload/impl.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/tests/receiptsState.test.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/buildTaskOptions.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/schemaState.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/index.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/types.ts create mode 100644 packages/cli/src/commands/consent/upload-preferences/worker.ts rename packages/cli/src/lib/preference-management/{parsePreferenceTimestampsFromCsv.ts => parsePreferenceFileFormatFromCsv.ts} (54%) delete mode 100644 packages/cli/src/lib/preference-management/uploadPreferenceManagementPreferencesInteractive.ts diff --git a/.changeset/late-terms-apply.md b/.changeset/late-terms-apply.md new file mode 100644 index 00000000..1fb6190a --- /dev/null +++ b/.changeset/late-terms-apply.md @@ -0,0 +1,7 @@ +--- +'@transcend-io/cli': major +--- + +Port the multi-identifier preference upload workflow into the monorepo CLI. + +This adds `consent configure-preference-upload`, moves `upload-preferences` to the schema-backed parallel worker flow, and includes the supporting receipts state, preference upload skill, and reconcile script. diff --git a/.cursor/skills/preference-data-upload/SKILL.md b/.cursor/skills/preference-data-upload/SKILL.md new file mode 100644 index 00000000..b18dce28 --- /dev/null +++ b/.cursor/skills/preference-data-upload/SKILL.md @@ -0,0 +1,429 @@ + + + +## Table of Contents + +- [Preference Data Upload Pipeline](#preference-data-upload-pipeline) + - [Prerequisites](#prerequisites) + - [Phase 1: Receive & Transform Raw Data](#phase-1-receive--transform-raw-data) + - [1.1 Set up working directory](#11-set-up-working-directory) + - [1.2 Ask the user](#12-ask-the-user) + - [1.3 Write the transform script](#13-write-the-transform-script) + - [1.4 Run and verify counts](#14-run-and-verify-counts) + - [Phase 2: Chunk Large Files](#phase-2-chunk-large-files) + - [2.1 Verify chunk counts](#21-verify-chunk-counts) + - [Phase 3: Generate & Validate Config](#phase-3-generate--validate-config) + - [3.1 Run interactive config](#31-run-interactive-config) + - [3.2 Validate the config](#32-validate-the-config) + - [Phase 4: Test Upload](#phase-4-test-upload) + - [4.1 Ask the user](#41-ask-the-user) + - [4.2 Run test upload](#42-run-test-upload) + - [4.3 Generate verification links](#43-generate-verification-links) + - [4.4 API verification script](#44-api-verification-script) + - [Phase 5: Full Upload](#phase-5-full-upload) + - [5.1 Key flags for production uploads](#51-key-flags-for-production-uploads) + - [5.2 Copy config and run](#52-copy-config-and-run) + - [5.3 Monitor progress](#53-monitor-progress) + - [5.4 Resumability](#54-resumability) + - [Phase 6: Error Analysis & Cleanup](#phase-6-error-analysis--cleanup) + - [Phase 7: Executive Summary](#phase-7-executive-summary) + + + +--- + +name: preference-data-upload +description: End-to-end workflow for uploading preference data to Transcend via the CLI. Covers receiving raw files, running transformation scripts, chunking, interactive config generation, test uploads with verification, and full production uploads. Use when the user mentions uploading preferences, preference migration, consent data upload, airtable upload, or bulk preference import. + +--- + +# Preference Data Upload Pipeline + +End-to-end workflow for uploading preference/consent data to Transcend's preference store via the CLI. + +## Prerequisites + +- Transcend CLI built and available (`pnpm build` then `pnpm start `) +- A valid `TRANSCEND_API_KEY` with scopes: `Manage Preference Store`, `View Preference Store` +- The partition UUID for the target preference store +- The `--transcendUrl` for the org (e.g. `https://api.us.transcend.io` for US-backed) + +## Phase 1: Receive & Transform Raw Data + +### 1.1 Set up working directory + +``` +working// +├── raw/ # Original files from customer (never modify) +├── exclusions/ # Block/suppression lists +├── transform.py # or transform.ts — transformation script +├── output/ +│ ├── batch_a/ # Chunked files for first-pass upload +│ ├── batch_b/ # Chunked files for second-pass (if split timestamps) +│ ├── test/ # Small subset for test uploads +│ └── all_chunks/ # Symlinks to all chunks (for config scanning) +└── README.md # Document the pipeline for this project +``` + +### 1.2 Ask the user + +- **Language preference**: Python (pandas) or TypeScript for the transform script? +- **Source files**: What are the raw input files and their schemas? +- **Exclusion list**: Is there a block/suppression list to filter out? +- **Column mappings**: Which columns map to email, name, country, purposes, timestamps? +- **Timestamp splitting**: Do any records have multiple consent timestamps that need splitting across batches? +- **Duplicate handling**: How should duplicate emails within a single source be handled? + - **Keep last** (default): `df.sort_values('timestamp').drop_duplicates(subset='email_lower', keep='last')` + - **Keep first**: Keep the earliest record + - **Skip dedup**: Let the uploader handle it (it appends `___` and API resolves by timestamp — works but generates warnings) + +### 1.3 Write the transform script + +The script must: + +1. Load and deduplicate the exclusion list (lowercase + strip emails) +2. Read source files with `dtype=str` (Python) to preserve original values +3. Rename columns to the standardized schema +4. Clean placeholder values (`[none]`, whitespace) in name/country fields +5. Filter out excluded emails +6. Filter out records with no valid timestamp +7. **Deduplicate by email** within each source (sort by timestamp, keep last) +8. Handle timestamp splitting (batch_a = earliest consent, batch_b = later consent) +9. Merge overlapping records across sources (e.g. Marketo + Iterable country merge) +10. Write `output/batch_a/batch_a.csv` and `output/batch_b/batch_b.csv` +11. Run sanity checks: no exclusion leaks, print record counts + +**Target output columns** (adjust per project): + +``` +email, firstName, lastName, _country, _Subscribed, +_consent_date, timestamp +``` + +**Purpose column values**: `True`, `False`, or empty string (empty = no preference, will map to null). + +### 1.4 Run and verify counts + +```bash +cd working/ +python3 transform.py # or: npx ts-node transform.ts +``` + +Cross-reference output counts with customer-provided numbers. Account for: + +- Exclusion list removals +- Records with no timestamp (dropped) +- Duplicate emails across sources (merged, not double-counted) +- Timestamp splits creating batch_b rows + +## Phase 2: Chunk Large Files + +Files over ~10MB should be chunked for parallel upload. The `chunk-csv` command defaults to 10MB chunks. + +```bash +pnpm start admin chunk-csv \ + --directory ./working//output/batch_a/ \ + --chunkSizeMB 10 + +pnpm start admin chunk-csv \ + --directory ./working//output/batch_b/ \ + --chunkSizeMB 10 +``` + +After chunking, move the originals out so they don't get scanned/uploaded: + +```bash +mv output/batch_a/batch_a.csv output/batch_a_original.csv +mv output/batch_b/batch_b.csv output/batch_b_original.csv +``` + +Create symlink directory for config scanning and test subset: + +```bash +mkdir -p output/all_chunks output/test + +# Symlink all chunks +for f in output/batch_a/batch_a_chunk_*.csv; do ln -s "../$f" output/all_chunks/; done +for f in output/batch_b/batch_b_chunk_*.csv; do ln -s "../$f" output/all_chunks/; done + +# Test subset +head -101 output/batch_a/batch_a_chunk_0001.csv > output/test/test_100.csv +cp output/batch_a/batch_a_chunk_0001.csv output/test/ +``` + +### 2.1 Verify chunk counts + +**IMPORTANT**: Always verify row counts after chunking using `test_csv_count.sh`: + +```bash +bash test_csv_count.sh -H ./working//output/batch_a/ +bash test_csv_count.sh -H ./working//output/batch_b/ +``` + +Cross-reference totals against the transform script output: + +| Batch | Expected Records | Actual Records | Chunks | +| ------- | ---------------- | -------------- | ------ | +| batch_a | X | X | N | +| batch_b | X | X | N | +| Total | X | X | N | + +Do NOT proceed to upload until counts match. + +> **Known issue — auto-chunk re-chunking**: The upload command has a built-in auto-chunk +> (default 11MB) that re-processes files. Since `chunk-csv` defaults to 10MB, some chunks +> land slightly over 10MB. The upload's 11MB threshold avoids re-chunking these. If you +> still see re-chunking, pass `--chunkSizeMB 0` to the upload command to disable it. +> Re-chunking creates duplicate files with `_chunk_0001` suffix that must be cleaned up. + +## Phase 3: Generate & Validate Config + +### 3.1 Run interactive config + +```bash +pnpm start consent configure-preference-upload \ + --auth $TRANSCEND_API_KEY \ + --partition \ + --directory ./working//output/all_chunks/ \ + --transcendUrl +``` + +The interactive flow walks through 6 steps: + +1. **Identifier columns** — select email + any secondary identifiers (firstName, lastName) +2. **Identifier mapping** — map each to org identifier names, mark which are unique +3. **Timestamp column** — select the column used for "last preference update" +4. **Purpose columns** — select which columns map to purposes/preferences +5. **Value mapping** — map each unique value to opted-in/opted-out/null. Empty strings auto-map to null (no preference). `True`/`False` defaults are auto-detected. +6. **Metadata columns** — select which remaining columns to INCLUDE as metadata (unselected are ignored) + +### 3.2 Validate the config + +Read the generated `preference-upload-schema.json` and verify: + +- **columnToIdentifier**: correct names, `email` marked as unique +- **timestampColumn**: points to the right column +- **columnToPurposeName**: each purpose column maps to correct org purpose, valueMapping includes `"": null` for empty strings +- **columnToMetadata**: country columns or other useful metadata included +- **columnsToIgnore**: consent date columns or other non-upload columns excluded + +Compare against any previous upload's config if available. + +## Phase 4: Test Upload + +### 4.1 Ask the user + +- **Test size**: How many records? (default: 100) +- **Corner cases**: Any specific scenarios to verify? (e.g. empty purpose values, records with only one consent type, overlapping source records) +- **Verification links**: Generate Transcend dashboard links for spot-checking + +### 4.2 Run test upload + +**Important**: The upload command does NOT have a `--file` flag. It processes all CSV files in `--directory`. To upload only a specific file, ensure it's the only CSV in the directory. + +Move extra files out of the test directory before uploading: + +```bash +cp output/preference-upload-schema.json output/test/ +# If batch_a_chunk_0001.csv is also in test/, move it out temporarily +mv output/test/batch_a_chunk_0001.csv output/test/batch_a_chunk_0001.csv.bak +``` + +First do a dry run: + +```bash +pnpm start consent upload-preferences \ + --auth $TRANSCEND_API_KEY \ + --partition \ + --directory ./working//output/test/ \ + --transcendUrl \ + --concurrency 1 \ + --dryRun +``` + +Verify: `PendingSafe` should equal total rows, `PendingConflicts: 0`, `Skipped: 0`. + +Then upload for real (drop `--dryRun`): + +```bash +pnpm start consent upload-preferences \ + --auth $TRANSCEND_API_KEY \ + --partition \ + --directory ./working//output/test/ \ + --transcendUrl \ + --concurrency 1 +``` + +Restore the chunk file after test: + +```bash +mv output/test/batch_a_chunk_0001.csv.bak output/test/batch_a_chunk_0001.csv +``` + +### 4.3 Generate verification links + +Extract sample emails and build dashboard URLs. URLs must be URL-encoded: + +``` +https://app.transcend.io/preference-store/user-preferences?filters=%7B%22identifiers%22%3A%5B%7B%22name%22%3A%22email%22%2C%22value%22%3A%22%22%7D%5D%7D +``` + +Note: `@` encodes to `%40`. Do NOT use raw JSON in the URL — it won't work. + +Pick emails covering these scenarios: + +- Has both purposes opted-in +- Has one opted-in, one empty (should show no preference for the empty one) +- Has both opted-out +- Has mixed (one opted-in, one opted-out) +- Has metadata (country) populated, especially split-source metadata (different countries) +- From each data source if multiple + +### 4.4 API verification script + +Create a verification script to query the API directly and validate results programmatically: + +```bash +#!/usr/bin/env bash +SOMBRA_URL="https://multi-tenant.sombra.us.transcend.io" # adjust for EU +PARTITION="" + +query_email() { + local email="$1" label="$2" + echo "=== $label: $email ===" + curl -s "${SOMBRA_URL}/v1/preferences/${PARTITION}/query" \ + -H "Authorization: Bearer ${TRANSCEND_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "{\"filter\":{\"identifiers\":[{\"name\":\"email\",\"value\":\"${email}\"}]},\"limit\":1}" \ + | python3 -m json.tool 2>/dev/null || echo "(failed to parse)" + echo "" +} + +query_email "user@example.com" "BOTH_OPTED_IN" +# ... add more scenarios +``` + +Run: `TRANSCEND_API_KEY= bash verify_upload.sh` + +Check that for each record: + +- `purposes` array has correct `enabled` values +- Empty source values result in purpose being **absent** (not `enabled: false`) +- `identifiers` include email, firstName, lastName +- `metadata` has correct country values +- `timestamp` matches source CSV +- `decryptionStatus` is `DECRYPTED`, `confirmed` is `true` + +Present results to user and ask them to verify before proceeding. + +## Phase 5: Full Upload + +### 5.1 Key flags for production uploads + +> **Important**: For large uploads (>1M records), dry-run and per-record verification +> dramatically slow uploads. For production runs of this scale, skip `--dryRun` and +> verify via sampling after completion. + +Critical flags: + +- **`--skipExistingRecordCheck`**: ALWAYS use for initial uploads or when the partition is empty/nearly empty. Without this, the uploader downloads existing preferences for every identifier in the file to check for conflicts — extremely slow on large datasets. +- **`--skipWorkflowTriggers`**: ALWAYS use for bulk imports. Without this, every record triggers workflows on the Transcend side, adding massive overhead and contributing to rate limiting. Only omit if workflows must fire per-record. +- **`--chunkSizeMB 0`**: Use when files are already pre-chunked to skip auto-chunking. +- **`--concurrency`**: Omit to auto-detect from CPU cores (typically 10-12). Each worker also makes `--uploadConcurrency` (default 75) parallel API requests with `--maxChunkSize` (default 25) records each. Total records in flight = `concurrency × uploadConcurrency × maxChunkSize`. + +### 5.2 Copy config and run + +```bash +cp output/preference-upload-schema.json output/batch_a/ +cp output/preference-upload-schema.json output/batch_b/ + +# Upload batch_a first (earliest timestamps) +pnpm start consent upload-preferences \ + --auth $TRANSCEND_API_KEY \ + --partition \ + --directory ./working//output/batch_a/ \ + --transcendUrl \ + --skipExistingRecordCheck \ + --skipWorkflowTriggers \ + --chunkSizeMB 0 + +# Then batch_b (later timestamps, overwrites where needed) +pnpm start consent upload-preferences \ + --auth $TRANSCEND_API_KEY \ + --partition \ + --directory ./working//output/batch_b/ \ + --transcendUrl \ + --skipExistingRecordCheck \ + --skipWorkflowTriggers \ + --chunkSizeMB 0 +``` + +### 5.3 Monitor progress + +The upload command shows a live dashboard with progress, throughput, and errors. Watch for: + +- **Rate limit retries**: Normal, the CLI handles these automatically +- **Unmapped value errors**: Indicates a value in the data not covered by the config — will hard-error in non-interactive worker mode +- **Network errors**: Transient, retried automatically up to 5 times +- **"Duplicate primary key" warnings**: Indicates duplicate emails within a single chunk file. The uploader handles these by uploading both with `___` suffix; the API resolves by timestamp. Harmless but indicates the transform script could improve dedup. + +### 5.4 Resumability + +The upload command writes receipt files. If interrupted, re-running the same command will resume from where it left off, skipping already-uploaded chunks. + +## Phase 6: Error Analysis & Cleanup + +If errors occur during upload: + +1. Check the logs directory for detailed error output: + - `/logs/worker-N.err.log` — per-worker error logs + - `/../receipts/combined-errors.log` — aggregated errors + - `/../receipts/combined-all.log` — full output with stack traces +2. Common issues: + - **Unmapped values**: Add missing mappings to `preference-upload-schema.json` and re-run + - **Unmapped columns**: Ensure all CSV columns are accounted for in the config as either identifiers, purposes, metadata, timestamp, or `columnsToIgnore` + - **Invalid identifiers**: Check for malformed emails in source data + - **Rate limits**: Reduce `--concurrency` or `--uploadConcurrency` and retry +3. Failed chunks can be re-uploaded by re-running the command (receipt-based resumption skips completed chunks) + +## Phase 7: Executive Summary + +After upload completes, produce a summary including: + +```markdown +# Preference Upload Summary — + +## Source Data + +- **Sources**: +- **Exclusion list**: emails filtered +- **Records dropped** (no timestamp): + +## Upload Results + +- **batch_a**: records across chunks +- **batch_b**: records across chunks +- **Total unique emails**: +- **Errors**: (detail any persistent failures) + +## Configuration + +- **Purposes mapped**: +- **Identifiers**: +- **Metadata**: + +## Verification + +- **Test upload**: records verified via dashboard +- **Sample checks**: + +## Timing + +- Transform: ~Xm +- Chunking: ~Xm +- Config generation: ~Xm +- Upload batch_a: ~Xh Xm +- Upload batch_b: ~Xh Xm +``` + +Post the summary as a comment on the associated Linear ticket. diff --git a/packages/cli/README.md b/packages/cli/README.md index 233e7161..c7c56835 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -30,6 +30,7 @@ A command line interface that allows you to programatically interact with the Tr - [`transcend request cron pull-identifiers`](#transcend-request-cron-pull-identifiers) - [`transcend request cron mark-identifiers-completed`](#transcend-request-cron-mark-identifiers-completed) - [`transcend consent build-xdi-sync-endpoint`](#transcend-consent-build-xdi-sync-endpoint) + - [`transcend consent configure-preference-upload`](#transcend-consent-configure-preference-upload) - [`transcend consent generate-access-tokens`](#transcend-consent-generate-access-tokens) - [`transcend consent pull-consent-metrics`](#transcend-consent-pull-consent-metrics) - [`transcend consent pull-consent-preferences`](#transcend-consent-pull-consent-preferences) @@ -1699,6 +1700,33 @@ transcend consent build-xdi-sync-endpoint \ --transcendUrl=https://api.us.transcend.io ``` +### `transcend consent configure-preference-upload` + +```txt +USAGE + transcend consent configure-preference-upload (--auth value) [--sombraAuth value] [--transcendUrl value] (--directory value) [--schemaFilePath value] (--partition value) + transcend consent configure-preference-upload --help + +Interactively configure the column mapping for preference CSV uploads. + +Scans ALL CSV files in the given directory to discover every column header +and every unique value per column, then walks through an interactive editor +to build the full mapping config (identifiers, ignored columns, timestamp, +purposes/preferences and their value mappings). + +The resulting config JSON is reused by 'upload-preferences' so subsequent +uploads run fully non-interactively. + +FLAGS + --auth The Transcend API key. Requires scopes: "View Preference Store Settings", "View Identity Verification Settings" + [--sombraAuth] The Sombra internal key, use for additional authentication when self-hosting Sombra + [--transcendUrl] URL of the Transcend backend. Use https://api.us.transcend.io for US hosting [default = https://api.transcend.io] + --directory Path to the directory of CSV files to scan for column headers and unique values + [--schemaFilePath] Path to the config JSON file. Defaults to /../preference-upload-schema.json + --partition The partition key for the preference store + -h --help Print help information and exit +``` + ### `transcend consent generate-access-tokens` ```txt @@ -2215,33 +2243,53 @@ transcend consent upload-data-flows-from-csv \ ```txt USAGE - transcend consent upload-preferences (--auth value) (--partition value) [--sombraAuth value] [--transcendUrl value] [--file value] [--directory value] [--dryRun] [--skipExistingRecordCheck] [--receiptFileDir value] [--skipWorkflowTriggers] [--forceTriggerWorkflows] [--skipConflictUpdates] [--isSilent] [--attributes value] [--receiptFilepath value] [--concurrency value] + transcend consent upload-preferences (--auth value) (--partition value) [--sombraAuth value] [--transcendUrl value] (--directory value) [--dryRun] [--skipExistingRecordCheck] [--receiptFileDir value] [--schemaFilePath value] [--skipWorkflowTriggers] [--forceTriggerWorkflows] [--skipConflictUpdates] [--isSilent] [--attributes value] [--receiptFilepath value] [--concurrency value] [--uploadConcurrency value] [--maxChunkSize value] [--rateLimitRetryDelay value] [--uploadLogInterval value] [--downloadIdentifierConcurrency value] [--maxRecordsToReceipt value] [--regenerate] [--chunkSizeMB value] [--viewerMode] transcend consent upload-preferences --help Upload preference management data to your Preference Store. -This command prompts you to map the shape of the CSV to the shape of the Transcend API. There is no requirement for the shape of the incoming CSV, as the script will handle the mapping process. +Requires a config file (generated by 'configure-preference-upload') that maps +CSV columns to identifiers, purposes, and preferences. If no config exists, +pass --regenerate to run the interactive configure flow first. -The script will also produce a JSON cache file that allows for the mappings to be preserved between runs. +Large files are automatically chunked into smaller pieces (controlled by +--chunkSizeMB) before uploading. + +Parallel preference uploader (Node 22+ ESM/TS) +----------------------------------------------------------------------------- +- Spawns a pool of child *processes* (not threads) to run uploads in parallel. +- Shows a live dashboard in the parent terminal with progress per worker. +- Creates per-worker log files and (optionally) opens OS terminals to tail them. +- Uses the same module as both parent and child; the child mode is toggled + by the presence of a CLI flag ('--as-child'). FLAGS - --auth The Transcend API key. Requires scopes: "Modify User Stored Preferences", "View Managed Consent Database Admin API", "View Preference Store Settings" - --partition The partition key to download consent preferences to - [--sombraAuth] The Sombra internal key, use for additional authentication when self-hosting Sombra - [--transcendUrl] URL of the Transcend backend. Use https://api.us.transcend.io for US hosting [default = https://api.transcend.io] - [--file] Path to the CSV file to load preferences from - [--directory] Path to the directory of CSV files to load preferences from - [--dryRun] Whether to do a dry run only - will write results to receiptFilepath without updating Transcend [default = false] - [--skipExistingRecordCheck] Whether to skip the check for existing records. SHOULD ONLY BE USED FOR INITIAL UPLOAD [default = false] - [--receiptFileDir] Directory path where the response receipts should be saved [default = ./receipts] - [--skipWorkflowTriggers] Whether to skip workflow triggers when uploading to preference store [default = false] - [--forceTriggerWorkflows] Whether to force trigger workflows for existing consent records [default = false] - [--skipConflictUpdates] Whether to skip uploading of any records where the preference store and file have a hard conflict [default = false] - [--isSilent/--noIsSilent] Whether to skip sending emails in workflows [default = true] - [--attributes] Attributes to add to any DSR request if created. Comma-separated list of key:value pairs. [default = Tags:transcend-cli,Source:transcend-cli] - [--receiptFilepath] Store resulting, continuing where left off [default = ./preference-management-upload-receipts.json] - [--concurrency] The concurrency to use when uploading in parallel [default = 10] - -h --help Print help information and exit + --auth The Transcend API key. Requires scopes: "Modify User Stored Preferences", "View Managed Consent Database Admin API", "View Preference Store Settings", "View Identity Verification Settings" + --partition The partition key to download consent preferences to + [--sombraAuth] The Sombra internal key, use for additional authentication when self-hosting Sombra + [--transcendUrl] URL of the Transcend backend. Use https://api.us.transcend.io for US hosting [default = https://api.transcend.io] + --directory Path to the directory of CSV files to load preferences from + [--dryRun] Whether to do a dry run only - will write results to receiptFilepath without updating Transcend [default = false] + [--skipExistingRecordCheck] Whether to skip the check for existing records. SHOULD ONLY BE USED FOR INITIAL UPLOAD [default = false] + [--receiptFileDir] Directory path where the response receipts should be saved. Defaults to ./receipts if a "file" is provided, or /../receipts if a "directory" is provided. + [--schemaFilePath] The path to where the schema for the file should be saved. If file is provided, it will default to ./-preference-upload-schema.json If directory is provided, it will default to /../preference-upload-schema.json + [--skipWorkflowTriggers] Whether to skip workflow triggers when uploading to preference store [default = false] + [--forceTriggerWorkflows] Whether to force trigger workflows for existing consent records [default = false] + [--skipConflictUpdates] Whether to skip uploading of any records where the preference store and file have a hard conflict [default = false] + [--isSilent/--noIsSilent] Whether to skip sending emails in workflows [default = true] + [--attributes] Attributes to add to any DSR request if created. Comma-separated list of key:value pairs. [default = Tags:transcend-cli,Source:transcend-cli] + [--receiptFilepath] Store resulting, continuing where left off [default = ./preference-management-upload-receipts.json] + [--concurrency] The number of concurrent processes to use to upload the files. When this is not set, it defaults to the number of CPU cores available on the machine. e.g. if there are 5 concurrent processes for 15 files, each parallel job would get 3 files to process. + [--uploadConcurrency] When uploading preferences to v1/preferences - this is the number of concurrent requests made at any given time by a single process.This is NOT the batch size—it's how many batch *tasks* run in parallel. The number of total concurrent requests is maxed out at concurrency * uploadConcurrency. [default = 75] + [--maxChunkSize] When uploading preferences to v1/preferences - this is the maximum number of records to put in a single request.The number of total concurrent records being put in at any one time is is maxed out at maxChunkSize * concurrency * uploadConcurrency. [default = 25] + [--rateLimitRetryDelay] When uploading preferences to v1/preferences - this is the number of milliseconds to wait before retrying a request that was rate limited. This is only used if the request is rate limited by the Transcend API. If the request fails for any other reason, it will not be retried. [default = 3000] + [--uploadLogInterval] When uploading preferences to v1/preferences - this is the number of records after which to log progress. Output will be logged to console and also to the receipt file. Setting this value lower will allow for you to more easily pick up where you left off. Setting this value higher can avoid excessive i/o operations slowing down the upload. Default is a good optimization for most cases. [default = 1000] + [--downloadIdentifierConcurrency] When downloading identifiers for the upload - this is the number of concurrent requests to make. This is only used if the records are not already cached in the preference store. [default = 30] + [--maxRecordsToReceipt] When writing out successful and pending records to the receipt file - this is the maximum number of records to write out. This is to avoid the receipt file getting too large for JSON.parse/stringify. [default = 10] + [--regenerate] Force re-generation of the schema config file before uploading. Runs the interactive configure flow even if a config already exists. [default = false] + [--chunkSizeMB] Auto-chunk threshold in MB. Any CSV file larger than this will be split into smaller files before uploading. Set to 0 to disable. [default = 11] + [--viewerMode] Run in non-interactive viewer mode (no attach UI, auto-artifacts) [default = false] + -h --help Print help information and exit ``` A sample CSV can be found [here](./examples/cli-upload-preferences-example.csv). In this example, `Sales` and `Marketing` are two custom Purposes, and `SalesCommunications` and `MarketingCommunications` are Preference Topics. During the interactive CLI prompt, you can map these columns to the slugs stored in Transcend! @@ -2253,7 +2301,7 @@ A sample CSV can be found [here](./examples/cli-upload-preferences-example.csv). ```sh transcend consent upload-preferences \ --auth="$TRANSCEND_API_KEY" \ - --file=./preferences.csv \ + --directory=./examples/pm-test \ --partition=4d1c5daa-90b7-4d18-aa40-f86a43d2c726 ``` @@ -2263,7 +2311,7 @@ transcend consent upload-preferences \ transcend consent upload-preferences \ --auth="$TRANSCEND_API_KEY" \ --partition=4d1c5daa-90b7-4d18-aa40-f86a43d2c726 \ - --file=./preferences.csv \ + --directory=./examples/pm-test \ --dryRun \ --skipWorkflowTriggers \ --skipConflictUpdates \ @@ -2278,7 +2326,7 @@ transcend consent upload-preferences \ transcend consent upload-preferences \ --auth="$TRANSCEND_API_KEY" \ --partition=4d1c5daa-90b7-4d18-aa40-f86a43d2c726 \ - --file=./preferences.csv \ + --directory=./examples/pm-test \ --transcendUrl=https://api.us.transcend.io ``` diff --git a/packages/cli/package.json b/packages/cli/package.json index c5838502..1819a232 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -78,6 +78,7 @@ "check-exports": "attw --pack . --ignore-rules cjs-resolves-to-esm", "script:transcend-json-schema": "tsx scripts/buildTranscendJsonSchema.ts && oxfmt --write ./schema/transcend-yml-schema-*.json", "script:pathfinder-json-schema": "tsx scripts/buildPathfinderJsonSchema.ts && oxfmt --write ./schema/pathfinder-policy-yml-schema.json", + "script:reconcile-preference-records": "tsx scripts/reconcile-preference-records.ts", "docgen": "tsx scripts/buildReadmeDocs.ts", "genfiles": "pnpm script:transcend-json-schema && pnpm script:pathfinder-json-schema && pnpm docgen" }, diff --git a/packages/cli/scripts/reconcile-preference-records.ts b/packages/cli/scripts/reconcile-preference-records.ts new file mode 100644 index 00000000..0916b0ae --- /dev/null +++ b/packages/cli/scripts/reconcile-preference-records.ts @@ -0,0 +1,846 @@ +#!/usr/bin/env node +/* eslint-disable max-len */ +/* eslint-disable jsdoc/require-description,jsdoc/require-returns,jsdoc/require-param-description,@typescript-eslint/no-explicit-any,max-lines,no-continue,no-loop-func,no-param-reassign */ + +import fs from 'node:fs'; +import path from 'node:path'; + +import { decodeCodec } from '@transcend-io/type-utils'; +import Bluebird from 'bluebird'; +import cliProgress from 'cli-progress'; +import colors from 'colors'; +import type { Options as CsvParseOptions } from 'csv-parse'; +import { parse as parseCsvSync } from 'csv-parse/sync'; +import type { Got } from 'got'; +import * as t from 'io-ts'; +import { chunk, uniqBy } from 'lodash-es'; + +import { createSombraGotInstance } from '../src/lib/graphql/index.js'; +import { getPreferencesForIdentifiers } from '../src/lib/preference-management/index.js'; +import { logger } from '../src/logger.js'; +// import { extractErrorMessage } from './lib/helpers'; + +const { map } = Bluebird; + +/** + * + */ +type Identifier = { + /** */ name: string /** */; + /** */ + value: string; +}; + +/** + * + */ +type PreferenceRecord = { + /** */ + identifiers?: Identifier[]; + /** */ + purposes?: Array<{ + /** */ + purpose: string; + /** */ + enabled: boolean; + /** */ + preferences?: Array<{ + /** */ + topic?: string; + /** */ + choice?: unknown; + }>; + }>; + [k: string]: unknown; +}; + +/** + * + */ +type Options = { + /** */ + in: string; + /** */ + partition: string; + /** */ + batchSize: number; + /** */ + downloadLogInterval: number; + /** */ + transcendUrl: string; + /** */ + transcendApiKey: string; + /** */ + sombraApiKey?: string; +}; + +/** + * + * @param pathToFile + * @param codec + * @param options + */ +export function readCsv( + pathToFile: string, + codec: T, + options: CsvParseOptions = {}, +): t.TypeOf[] { + const fileContent = parseCsvSync(fs.readFileSync(pathToFile, 'utf-8'), { + columns: true, + relax_column_count: true, + relax_quotes: true, + skip_empty_lines: true, + trim: true, + ...options, + }); + + const data = decodeCodec(t.array(codec), fileContent); + + const parsed = data.map((datum) => + Object.entries(datum).reduce( + (acc, [key, value]) => + Object.assign(acc, { + [key.replace(/[^a-z_.+\-A-Z -~]/g, '')]: value, + }), + {} as any, + ), + ); + + return parsed as any; +} + +const OutRowCodec = t.intersection([ + t.type({ + personID: t.string, + transcendID: t.string, + email_withheld: t.string, + }), + t.record(t.string, t.unknown), +]); + +/** + * + */ +type OutRow = t.TypeOf; + +/** + * + */ +type RowMetrics = { + /** */ + lookupBy: 'transcendID' | 'personID'; + /** */ + lookupValue: string; + + /** Total records AFTER unique-by-fingerprint */ + totalRecords: number; + + /** Total records BEFORE unique-by-fingerprint */ + totalRecordsRaw: number; + + /** */ + email: string; + /** */ + emailCount: number; + /** */ + multiEmail: boolean; + + /** */ + distinctVariants: number; + /** */ + largestVariantCount: number; + /** */ + identicalRecordCount: number; + /** */ + allIdentical: boolean; + + /** */ + isDuplicateRow: boolean; + /** */ + dupOfRowIndex: number; + + /** JSON dump of RAW records (before unique) */ + recordsJson: string; + /** */ + recordsJsonTruncated: boolean; + + /** */ + runAttempted: boolean; + /** */ + runUpdated: boolean; + /** */ + runUpdateIdentifier: 'transcendID' | 'email' | ''; + /** */ + runError: string; +}; + +/** + * + * @param record + */ +function fingerprintRecord(record: PreferenceRecord): string { + const ids = (record.identifiers ?? []) + .filter((x) => x?.name && x?.value) + .map((x) => ({ name: String(x.name), value: String(x.value) })) + .sort((a, b) => + a.name === b.name ? a.value.localeCompare(b.value) : a.name.localeCompare(b.name), + ); + + const purposes = (record.purposes ?? []) + .map((p) => { + const prefs = (p.preferences ?? []) + .map((pr) => ({ topic: pr.topic ?? '', choice: pr.choice ?? null })) + .sort((a, b) => String(a.topic).localeCompare(String(b.topic))); + + return { purpose: p.purpose, enabled: !!p.enabled, preferences: prefs }; + }) + .sort((a, b) => String(a.purpose).localeCompare(String(b.purpose))); + + return JSON.stringify({ identifiers: ids, purposes }); +} + +/** + * + * @param records + */ +function getUniqueEmails(records: PreferenceRecord[]): string[] { + const set = new Set(); + for (const r of records) { + for (const id of r.identifiers ?? []) { + if (id?.name === 'email' && id.value) { + const v = String(id.value).trim(); + if (v) set.add(v); + } + } + } + return Array.from(set).sort((a, b) => a.localeCompare(b)); +} + +/** + * Build lookup map with duplicates preserved. + * + * @param records + */ +function buildLookupMapAll(records: PreferenceRecord[]): Map { + const m = new Map(); + for (const r of records) { + for (const id of r.identifiers ?? []) { + if (!id?.name || !id?.value) continue; + const k = `${id.name}:${id.value}`; + const arr = m.get(k) ?? []; + arr.push(r); + m.set(k, arr); + } + } + return m; +} + +/** + * + * @param records + */ +function uniqueByFingerprint(records: PreferenceRecord[]): PreferenceRecord[] { + const seen = new Set(); + const out: PreferenceRecord[] = []; + for (const r of records) { + const fp = fingerprintRecord(r); + if (seen.has(fp)) continue; + seen.add(fp); + out.push(r); + } + return out; +} + +/** + * + * @param records + */ +function countRawVsUniqueByFingerprint(records: PreferenceRecord[]): { + /** */ + raw: number; + /** */ + unique: number; +} { + const seen = new Set(); + for (const r of records) seen.add(fingerprintRecord(r)); + return { raw: records.length, unique: seen.size }; +} + +/** + * Fetch preferences for NON-shared identifiers (transcendID/email) in a batch, + * caching raw results PER identifier value. + * + * IMPORTANT: Cache stores RAW arrays (not deduped). + * + * @param sombra + * @param opts + */ +async function getPreferencesForIdentifiersCachedRaw( + sombra: Got, + opts: { + /** */ + identifiers: Identifier[]; + /** */ + partitionKey: string; + /** */ + logInterval: number; + /** */ + cache: Map; + /** */ + counters: { + /** */ hit: number /** */; + /** */ + miss: number; + }; + }, +): Promise { + if (opts.identifiers.length === 0) return []; + + const toFetch: Identifier[] = []; + const fromCache: PreferenceRecord[] = []; + + for (const id of opts.identifiers) { + const k = `${id.name}:${id.value}`; + const cached = opts.cache.get(k); + if (cached) { + opts.counters.hit += 1; + fromCache.push(...cached); + } else { + opts.counters.miss += 1; + toFetch.push(id); + } + } + + if (toFetch.length === 0) return fromCache; + + const fetched = (await getPreferencesForIdentifiers(sombra, { + identifiers: toFetch, + partitionKey: opts.partitionKey, + concurrency: 50, + logInterval: opts.logInterval, + skipLogging: true, + })) as any as PreferenceRecord[]; + + // Populate cache PER identifier by scanning returned records. + // This preserves duplicates in the cached arrays. + const lookupFetched = buildLookupMapAll(fetched); + for (const id of toFetch) { + const k = `${id.name}:${id.value}`; + opts.cache.set(k, lookupFetched.get(k) ?? []); + } + + return [...fromCache, ...fetched]; +} + +/** + * Shared identifiers (personID) must be queried one-by-one. + * Cache stores RAW arrays per identifier. + * + * @param sombra + * @param opts + */ +async function getPreferencesForSharedIdentifiersOneByOneCachedRaw( + sombra: Got, + opts: { + /** */ + identifiers: Identifier[]; + /** */ + partitionKey: string; + /** */ + cache: Map; + /** */ + counters: { + /** */ hit: number /** */; + /** */ + miss: number; + }; + }, +): Promise { + if (opts.identifiers.length === 0) return []; + + const results = await map( + opts.identifiers, + async (identifier) => { + const cacheKey = `${identifier.name}:${identifier.value}`; + const cached = opts.cache.get(cacheKey); + if (cached) { + opts.counters.hit += 1; + return cached; + } + opts.counters.miss += 1; + + const recs = (await getPreferencesForIdentifiers(sombra, { + identifiers: [identifier], + partitionKey: opts.partitionKey, + concurrency: 1, + logInterval: 999999999, + skipLogging: true, + })) as any as PreferenceRecord[]; + + opts.cache.set(cacheKey, recs); + return recs; + }, + { concurrency: 25 }, + ); + + return results.flat(); +} + +/** + * + * @param sombra + * @param args + */ +async function putIdentifierOnly( + sombra: Got, + args: { + /** */ partition: string /** */; + /** */ + identifier: Identifier; + }, +): Promise { + try { + await sombra + .put('v1/preferences', { + json: { + records: [ + { + timestamp: new Date(Date.now() - 365 * 24 * 60 * 60 * 1000).toISOString(), + partition: args.partition, + identifiers: [args.identifier], + }, + ], + skipWorkflowTriggers: true, + }, + }) + .json(); + } catch (e) { + throw new Error(`Failed to put identifier: ${e?.response?.body}`); + } +} + +/** + * + * @param v + */ +function csvEscape(v: string): string { + const s = v ?? ''; + if (/[",\n\r]/.test(s)) return `"${s.replace(/"/g, '""')}"`; + return s; +} + +/** + * + * @param n + */ +function ms(n: number): string { + if (n < 1000) return `${n}ms`; + return `${(n / 1000).toFixed(2)}s`; +} + +/** + * + */ +async function main(): Promise { + const inputCsv = process.env.INPUT_CSV; + const opts: Options = { + in: path.resolve(inputCsv ?? ''), + partition: process.env.PARTITION ?? '', + batchSize: Number(process.env.BATCH_SIZE ?? '500'), + downloadLogInterval: Number(process.env.DOWNLOAD_LOG_INTERVAL ?? '100'), + transcendUrl: process.env.TRANSCEND_URL ?? '', + transcendApiKey: process.env.TRANSCEND_API_KEY ?? '', + sombraApiKey: process.env.SOMBRA_API_KEY, + }; + + const runEnabled = String(process.env.RUN ?? '').toLowerCase() === 'true'; + const maxJsonChars = Number(process.env.MAX_JSON_CHARS ?? '50000'); + + if (!inputCsv || !opts.partition || !opts.transcendUrl || !opts.transcendApiKey) { + throw new Error( + 'Missing one or more required environment variables: INPUT_CSV, PARTITION, TRANSCEND_URL, TRANSCEND_API_KEY.', + ); + } + + const t0 = Date.now(); + + logger.info(colors.green(`Reading CSV: ${opts.in}`)); + const rows = readCsv(opts.in, OutRowCodec, { columns: true }) as OutRow[]; + + const rawFile = fs.readFileSync(opts.in, 'utf-8'); + const headerLine = rawFile.split(/\r?\n/)[0] ?? 'personID,transcendID,email_withheld'; + + const extraHeaders = [ + 'lookupBy', + 'lookupValue', + 'totalRecordsRaw', + 'totalRecords', + 'distinctVariants', + 'largestVariantCount', + 'identicalRecordCount', + 'allIdentical', + 'email', + 'emailCount', + 'multiEmail', + 'isDuplicateRow', + 'dupOfRowIndex', + 'recordsJsonTruncated', + 'recordsJson', + 'runAttempted', + 'runUpdated', + 'runUpdateIdentifier', + 'runError', + ]; + + logger.info(colors.green('Creating Sombra client...')); + const sombra = await createSombraGotInstance( + opts.transcendUrl, + opts.transcendApiKey, + opts.sombraApiKey, + ); + + // RAW caches (per identifier value) + const personIdCache = new Map(); + const transcendIdCache = new Map(); + const emailCache = new Map(); + + // duplicate-row tracking + const rowSeen = new Map(); + + const outTmp = `${opts.in}.tmp`; + const writer = fs.createWriteStream(outTmp, { encoding: 'utf8' }); + writer.write(`${headerLine},${extraHeaders.join(',')}\n`); + + const batches = chunk(rows, opts.batchSize); + logger.info( + colors.magenta( + `Processing ${rows.length} rows in ${batches.length} batches (batchSize=${opts.batchSize}) RUN=${runEnabled}`, + ), + ); + + const progressBar = new cliProgress.SingleBar( + { + format: `Rows |${colors.cyan( + '{bar}', + )}| {value}/{total} | {percentage}% | ETA {eta}s | batch {batch}/{batches}`, + }, + cliProgress.Presets.shades_classic, + ); + progressBar.start(rows.length, 0, { batch: 0, batches: batches.length }); + + let processed = 0; + let written = 0; + let dupRows = 0; + + for (let batchIndex = 0; batchIndex < batches.length; batchIndex += 1) { + const batch = batches[batchIndex]; + const batchT0 = Date.now(); + + progressBar.update(processed, { + batch: batchIndex + 1, + batches: batches.length, + }); + + // counters for cache efficiency + const cTrans = { hit: 0, miss: 0 }; + const cEmail = { hit: 0, miss: 0 }; + const cPerson = { hit: 0, miss: 0 }; + + const stage0 = Date.now(); + const transcendIDs: Identifier[] = uniqBy( + batch + .map((r) => { + const v = String((r as any).transcendID ?? '').trim(); + return v ? ({ name: 'transcendID', value: v } as Identifier) : null; + }) + .filter(Boolean) as Identifier[], + (x) => `${x.name}:${x.value}`, + ); + + const personIDs: Identifier[] = uniqBy( + batch + .map((r) => { + const v = String((r as any).personID ?? '').trim(); + return v ? ({ name: 'personID', value: v } as Identifier) : null; + }) + .filter(Boolean) as Identifier[], + (x) => `${x.name}:${x.value}`, + ); + + const emails: Identifier[] = uniqBy( + batch + .map((r) => { + const v = String((r as any).email ?? '').trim(); + return v && v.includes('@') ? ({ name: 'email', value: v } as Identifier) : null; + }) + .filter(Boolean) as Identifier[], + (x) => `${x.name}:${x.value}`, + ); + + const idBuildMs = Date.now() - stage0; + + // FETCH RAW (no dedupe here) + const stage1 = Date.now(); + const [recordsByTranscendRaw, recordsByPersonRaw, recordsByEmailRaw] = await Promise.all([ + getPreferencesForIdentifiersCachedRaw(sombra, { + identifiers: transcendIDs, + partitionKey: opts.partition, + logInterval: opts.downloadLogInterval, + cache: transcendIdCache, + counters: cTrans, + }), + getPreferencesForSharedIdentifiersOneByOneCachedRaw(sombra, { + identifiers: personIDs, + partitionKey: opts.partition, + cache: personIdCache, + counters: cPerson, + }), + getPreferencesForIdentifiersCachedRaw(sombra, { + identifiers: emails, + partitionKey: opts.partition, + logInterval: opts.downloadLogInterval, + cache: emailCache, + counters: cEmail, + }), + ]); + const fetchMs = Date.now() - stage1; + + // Batch-level “how many dupes” metrics (raw vs unique-by-fingerprint) + const tCounts = countRawVsUniqueByFingerprint(recordsByTranscendRaw); + const pCounts = countRawVsUniqueByFingerprint(recordsByPersonRaw); + const eCounts = countRawVsUniqueByFingerprint(recordsByEmailRaw); + + const allRaw = [ + ...(recordsByTranscendRaw as PreferenceRecord[]), + ...(recordsByPersonRaw as PreferenceRecord[]), + ...(recordsByEmailRaw as PreferenceRecord[]), + ]; + const allCounts = countRawVsUniqueByFingerprint(allRaw); + + // Lookup map MUST preserve duplicates so per-row raw length is meaningful + const lookupMapRaw = buildLookupMapAll(allRaw); + + // ROW PROCESSING + const stage2 = Date.now(); + const results = await map( + batch, + async (r, idxInBatch) => { + const rowIndex = batchIndex * opts.batchSize + idxInBatch + 1; + + const personID = String((r as any).personID ?? '').trim(); + const transcendID = String((r as any).transcendID ?? '').trim(); + const emailWithheld = String((r as any).email_withheld ?? '').trim(); + const email = String((r as any).email ?? '').trim(); + + const lookupBy: 'transcendID' | 'personID' = transcendID ? 'transcendID' : 'personID'; + const lookupValue = transcendID || personID || ''; + + const rowKey = `${personID}||${transcendID}||${emailWithheld}`; + const firstSeenAt = rowSeen.get(rowKey) ?? 0; + const isDup = firstSeenAt > 0; + if (!isDup) rowSeen.set(rowKey, rowIndex); + else dupRows += 1; + + const metrics: RowMetrics = { + lookupBy, + lookupValue, + totalRecordsRaw: 0, + totalRecords: 0, + + email: '', + emailCount: 0, + multiEmail: false, + + distinctVariants: 0, + largestVariantCount: 0, + identicalRecordCount: 0, + allIdentical: false, + + isDuplicateRow: isDup, + dupOfRowIndex: isDup ? firstSeenAt : 0, + + recordsJson: '', + recordsJsonTruncated: false, + + runAttempted: false, + runUpdated: false, + runUpdateIdentifier: '', + runError: '', + }; + + if (!lookupValue) { + metrics.runError = 'Missing both transcendID and personID'; + return { personID, transcendID, emailWithheld, metrics }; + } + + // RAW matches (duplicates preserved) + const recsRaw: PreferenceRecord[] = [ + ...(transcendID ? (lookupMapRaw.get(`transcendID:${transcendID}`) ?? []) : []), + // ...(personID ? lookupMapRaw.get(`personID:${personID}`) ?? [] : []), + ...(email && email.includes('@') ? (lookupMapRaw.get(`email:${email}`) ?? []) : []), + ]; + + metrics.totalRecordsRaw = recsRaw.length; + + // Unique-by-fingerprint (this is where you measure “how many dupes”) + const recsUnique = uniqueByFingerprint(recsRaw); + metrics.totalRecords = recsUnique.length; + + // email source of truth from UNIQUE set + const emailsFound = getUniqueEmails(recsUnique); + metrics.emailCount = emailsFound.length; + metrics.multiEmail = emailsFound.length > 1; + metrics.email = emailsFound.length === 1 ? emailsFound[0] : ''; + + // variant stats on UNIQUE set + if (recsUnique.length > 0) { + metrics.distinctVariants = recsUnique.length; // because recsUnique is unique by fingerprint + metrics.largestVariantCount = 1; + metrics.identicalRecordCount = 1; + metrics.allIdentical = recsUnique.length === 1; + } + + // JSON dump of RAW records (pre-unique) + let json = ''; + try { + json = JSON.stringify(recsRaw); + } catch (e: any) { + json = JSON.stringify({ + error: 'Failed to stringify recsRaw', + message: e?.message ?? String(e), + }); + } + if (maxJsonChars > 0 && json.length > maxJsonChars) { + metrics.recordsJsonTruncated = true; + metrics.recordsJson = json.slice(0, maxJsonChars); + } else { + metrics.recordsJson = json; + } + + if (runEnabled) { + metrics.runAttempted = true; + try { + if (transcendID) { + await putIdentifierOnly(sombra, { + partition: opts.partition, + identifier: { name: 'transcendID', value: transcendID }, + }); + metrics.runUpdated = true; + metrics.runUpdateIdentifier = 'transcendID'; + } else if (emailsFound.length === 1) { + await putIdentifierOnly(sombra, { + partition: opts.partition, + identifier: { name: 'email', value: emailsFound[0] }, + }); + metrics.runUpdated = true; + metrics.runUpdateIdentifier = 'email'; + } else if (emailsFound.length === 0) { + metrics.runError = + 'RUN enabled but no transcendID and no email found in existing records'; + } else { + metrics.runError = `RUN enabled but multiple emails found (${emailsFound.length})`; + } + } catch (err: any) { + metrics.runError = err?.message ?? String(err); + } + if (metrics.runError) { + logger.warn(colors.yellow(`Row ${rowIndex} update error: ${metrics.runError}`)); + } + } + + return { personID, transcendID, emailWithheld, metrics }; + }, + { concurrency: 50 }, + ); + const processMs = Date.now() - stage2; + + // WRITE (always write 1 output row per input row) + const stage3 = Date.now(); + for (const { personID, transcendID, emailWithheld, metrics } of results as any) { + writer.write( + `${[ + csvEscape(personID), + csvEscape(transcendID), + csvEscape(emailWithheld), + + csvEscape(metrics.lookupBy), + csvEscape(metrics.lookupValue), + csvEscape(String(metrics.totalRecordsRaw)), + csvEscape(String(metrics.totalRecords)), + csvEscape(String(metrics.distinctVariants)), + csvEscape(String(metrics.largestVariantCount)), + csvEscape(String(metrics.identicalRecordCount)), + csvEscape(String(metrics.allIdentical)), + + csvEscape(metrics.email), + csvEscape(String(metrics.emailCount)), + csvEscape(String(metrics.multiEmail)), + + csvEscape(String(metrics.isDuplicateRow)), + csvEscape(String(metrics.dupOfRowIndex)), + + csvEscape(String(metrics.recordsJsonTruncated)), + csvEscape(metrics.recordsJson), + + csvEscape(String(metrics.runAttempted)), + csvEscape(String(metrics.runUpdated)), + csvEscape(metrics.runUpdateIdentifier), + csvEscape(metrics.runError), + ].join(',')}\n`, + ); + written += 1; + } + const writeMs = Date.now() - stage3; + + processed += batch.length; + progressBar.update(processed, { + batch: batchIndex + 1, + batches: batches.length, + }); + + const batchMs = Date.now() - batchT0; + + // This is the key log you want: raw vs unique before any dedupe. + logger.info( + colors.green( + `Batch ${batchIndex + 1}/${batches.length} rows=${ + batch.length + } written=${written} dupRows=${dupRows} ` + + `| ids: tID=${transcendIDs.length} email=${emails.length} personID=${personIDs.length} ` + + `| fetched(raw/uniq): tID=${tCounts.raw}/${tCounts.unique} ` + + `email=${eCounts.raw}/${eCounts.unique} personID=${pCounts.raw}/${pCounts.unique} ` + + `ALL=${allCounts.raw}/${allCounts.unique} ` + + `| cache(hit/miss): tID=${cTrans.hit}/${cTrans.miss} email=${cEmail.hit}/${cEmail.miss} personID=${cPerson.hit}/${cPerson.miss} ` + + `| timing: build=${ms(idBuildMs)} fetch=${ms(fetchMs)} process=${ms( + processMs, + )} write=${ms(writeMs)} total=${ms(batchMs)}`, + ), + ); + } + + progressBar.update(rows.length); + progressBar.stop(); + + await new Promise((resolve, reject) => { + writer.end(() => resolve()); + writer.on('error', reject); + }); + + fs.renameSync(outTmp, opts.in); + + const totalMs = Date.now() - t0; + logger.info( + colors.magenta( + `Done. Wrote ${written}/${rows.length} rows (dupRows=${dupRows}) to "${ + opts.in + }" in ${ms(totalMs)}.`, + ), + ); +} + +main().catch((err) => { + logger.error(colors.red(err?.stack ?? String(err))); + process.exit(1); +}); +/* eslint-enable jsdoc/require-description,jsdoc/require-returns,jsdoc/require-param-description,@typescript-eslint/no-explicit-any,max-lines,no-continue,no-loop-func,no-param-reassign */ +/* eslint-enable max-len */ diff --git a/packages/cli/src/commands/consent/configure-preference-upload/command.ts b/packages/cli/src/commands/consent/configure-preference-upload/command.ts new file mode 100644 index 00000000..adf51901 --- /dev/null +++ b/packages/cli/src/commands/consent/configure-preference-upload/command.ts @@ -0,0 +1,53 @@ +import { buildCommand } from '@stricli/core'; +import { ScopeName } from '@transcend-io/privacy-types'; + +import { + createAuthParameter, + createSombraAuthParameter, + createTranscendUrlParameter, +} from '../../../lib/cli/common-parameters.js'; + +export const configurePreferenceUploadCommand = buildCommand({ + loader: async () => { + const { configurePreferenceUpload } = await import('./impl.js'); + return configurePreferenceUpload; + }, + parameters: { + flags: { + auth: createAuthParameter({ + scopes: [ScopeName.ViewPreferenceStoreSettings, ScopeName.ViewRequestIdentitySettings], + }), + sombraAuth: createSombraAuthParameter(), + transcendUrl: createTranscendUrlParameter(), + directory: { + kind: 'parsed', + parse: String, + brief: 'Path to the directory of CSV files to scan for column headers and unique values', + }, + schemaFilePath: { + kind: 'parsed', + parse: String, + brief: + 'Path to the config JSON file. Defaults to /../preference-upload-schema.json', + optional: true, + }, + partition: { + kind: 'parsed', + parse: String, + brief: 'The partition key for the preference store', + }, + }, + }, + docs: { + brief: 'Interactively configure the column mapping for preference CSV uploads', + fullDescription: `Interactively configure the column mapping for preference CSV uploads. + +Scans ALL CSV files in the given directory to discover every column header +and every unique value per column, then walks through an interactive editor +to build the full mapping config (identifiers, ignored columns, timestamp, +purposes/preferences and their value mappings). + +The resulting config JSON is reused by 'upload-preferences' so subsequent +uploads run fully non-interactively.`, + }, +}); diff --git a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts new file mode 100644 index 00000000..1c051446 --- /dev/null +++ b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts @@ -0,0 +1,363 @@ +import { createReadStream } from 'node:fs'; + +import { PersistedState } from '@transcend-io/persisted-state'; +import colors from 'colors'; +import { parse as csvParse } from 'csv-parse'; +import inquirer from 'inquirer'; +import * as t from 'io-ts'; + +import type { LocalContext } from '../../../context.js'; +import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; +import { buildTranscendGraphQLClient } from '../../../lib/graphql/index.js'; +import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; +import { + FileFormatState, + parsePreferenceIdentifiersFromCsv, + parsePreferenceFileFormatFromCsv, + parsePreferenceAndPurposeValuesFromCsv, +} from '../../../lib/preference-management/index.js'; +import { readCsv } from '../../../lib/requests/index.js'; +import { logger } from '../../../logger.js'; +import { computeSchemaFile } from '../upload-preferences/artifacts/index.js'; +import { loadReferenceData } from '../upload-preferences/upload/loadReferenceData.js'; + +export interface ConfigurePreferenceUploadFlags { + auth: string; + sombraAuth?: string; + transcendUrl: string; + directory: string; + schemaFilePath?: string; + partition: string; +} + +/** + * Scan a single CSV file and collect its column headers plus all unique + * values per column. Uses streaming so large files don't need to be held + * in memory. + * + * @param file - CSV file path to scan + * @returns headers and uniqueValuesByColumn + */ +async function scanOneFile(file: string): Promise<{ + headers: Set; + uniqueValuesByColumn: Record>; +}> { + const headers = new Set(); + const uniqueValuesByColumn: Record> = {}; + + await new Promise((resolve, reject) => { + const parser = createReadStream(file).pipe(csvParse({ columns: true, skip_empty_lines: true })); + parser.on('data', (row: Record) => { + for (const [col, val] of Object.entries(row)) { + headers.add(col); + if (!uniqueValuesByColumn[col]) { + uniqueValuesByColumn[col] = new Set(); + } + const trimmed = (val || '').trim(); + uniqueValuesByColumn[col].add(trimmed); + } + }); + parser.on('end', resolve); + parser.on('error', reject); + }); + + return { headers, uniqueValuesByColumn }; +} + +const SCAN_CONCURRENCY = 25; + +async function scanCsvFiles(files: string[]): Promise<{ + /** Union of all column headers */ + headers: string[]; + /** Map of column name to its unique values (trimmed, non-empty) */ + uniqueValuesByColumn: Record>; +}> { + const allHeaders = new Set(); + const merged: Record> = {}; + let completed = 0; + + const queue = [...files]; + const run = async (): Promise => { + while (queue.length > 0) { + const file = queue.shift()!; + const result = await scanOneFile(file); + for (const h of result.headers) allHeaders.add(h); + for (const [col, vals] of Object.entries(result.uniqueValuesByColumn)) { + if (!merged[col]) merged[col] = new Set(); + for (const v of vals) merged[col].add(v); + } + completed += 1; + if (completed % 25 === 0 || completed === files.length) { + logger.info(colors.green(` Scanned ${completed}/${files.length} files...`)); + } + } + }; + + const workers = Array.from({ length: Math.min(SCAN_CONCURRENCY, files.length) }, () => run()); + await Promise.all(workers); + + return { headers: [...allHeaders], uniqueValuesByColumn: merged }; +} + +/** + * Build synthetic preference rows from the scanned unique values so + * the existing parse functions see every value at least once. + * + * Row count is driven only by `enumColumns` (purpose/preference columns) + * whose unique values actually matter for mapping. High-cardinality + * columns like timestamps or emails are filled with a single sample value. + * + * @param headers - all column headers + * @param uniqueValuesByColumn - unique values per column + * @param enumColumns - columns whose full unique values must be represented + * @returns synthetic rows covering all unique enum values + */ +function buildSyntheticRows( + headers: string[], + uniqueValuesByColumn: Record>, + enumColumns: string[] = [], +): Record[] { + const enumSet = new Set(enumColumns); + const maxRows = Math.max(1, ...enumColumns.map((h) => uniqueValuesByColumn[h]?.size ?? 0)); + const rows: Record[] = []; + for (let i = 0; i < maxRows; i += 1) { + const row: Record = {}; + for (const h of headers) { + const vals = uniqueValuesByColumn[h] ? [...uniqueValuesByColumn[h]] : ['']; + row[h] = enumSet.has(h) ? (vals[i % vals.length] ?? '') : (vals[0] ?? ''); + } + rows.push(row); + } + return rows; +} + +/** + * Interactively configure the column mapping for preference CSV uploads. + * + * Scans ALL CSV files in a directory, discovers every header and unique value, + * then walks the user through mapping identifiers, timestamps, + * purpose/preference value mappings, and metadata columns. + * Saves the result as a reusable config. + * + * @param flags - CLI flags + */ +export async function configurePreferenceUpload( + this: LocalContext, + flags: ConfigurePreferenceUploadFlags, +): Promise { + const { auth, transcendUrl, directory, schemaFilePath } = flags; + + const files = collectCsvFilesOrExit(directory, this); + doneInputValidation(this.process.exit); + + logger.info( + colors.green(`Scanning ${files.length} CSV file(s) for headers and unique values...`), + ); + + // 1) Scan all files to discover the full column/value universe + const { headers, uniqueValuesByColumn } = await scanCsvFiles(files); + logger.info(colors.green(`Discovered ${headers.length} columns across all files.`)); + + // 2) Fetch org reference data + const client = buildTranscendGraphQLClient(transcendUrl, auth); + const { purposes, preferenceTopics, identifiers } = await loadReferenceData(client); + + const allIdentifierNames = identifiers.map((id) => id.name); + logger.info( + colors.green( + `Loaded ${purposes.length} purposes, ${preferenceTopics.length} preference topics, ${identifiers.length} identifiers from org.`, + ), + ); + + // 3) Create or load persisted schema state + const schemaFile = computeSchemaFile(schemaFilePath, directory, files[0]); + const initial = { + columnToPurposeName: {}, + lastFetchedAt: new Date().toISOString(), + columnToIdentifier: {}, + } as const; + const schemaState = new PersistedState(schemaFile, FileFormatState, initial); + + // 4) Interactive: select identifier columns + logger.info(colors.green('\n[Step 1/6] Identifier column selection...')); + const existingIdentifierCols = Object.keys(schemaState.getValue('columnToIdentifier')); + let identifierColumns: string[]; + if (existingIdentifierCols.length > 0) { + logger.info( + colors.magenta(`Existing identifier columns: ${existingIdentifierCols.join(', ')}`), + ); + const { reuse } = await inquirer.prompt<{ reuse: boolean }>([ + { + name: 'reuse', + type: 'confirm', + message: `Keep existing identifier column selection? (${existingIdentifierCols.join( + ', ', + )})`, + default: true, + }, + ]); + identifierColumns = reuse + ? existingIdentifierCols + : ( + await inquirer.prompt<{ cols: string[] }>([ + { + name: 'cols', + type: 'checkbox', + message: 'Select columns that are identifiers', + choices: headers, + validate: (v: string[]) => v.length > 0 || 'Select at least one identifier column', + }, + ]) + ).cols; + } else { + identifierColumns = ( + await inquirer.prompt<{ cols: string[] }>([ + { + name: 'cols', + type: 'checkbox', + message: 'Select columns that are identifiers', + choices: headers, + validate: (v: string[]) => v.length > 0 || 'Select at least one identifier column', + }, + ]) + ).cols; + } + + // 5) Map identifier columns to org identifier names + logger.info( + colors.green(`\n[Step 2/6] Identifier name mapping (validating sample: ${files[0]})...`), + ); + const sampleRows = readCsv(files[0], t.record(t.string, t.string)); + await parsePreferenceIdentifiersFromCsv(sampleRows, { + schemaState, + orgIdentifiers: identifiers, + allowedIdentifierNames: allIdentifierNames, + identifierColumns, + }); + + const identifierCols = Object.keys(schemaState.getValue('columnToIdentifier')); + + // 6) Select timestamp column (only needs column names, not full rows) + logger.info(colors.green('\n[Step 3/6] Timestamp column selection...')); + const timestampChoices = headers.filter((h) => !identifierCols.includes(h)); + await parsePreferenceFileFormatFromCsv( + [ + Object.fromEntries( + timestampChoices.map((h) => [h, [...(uniqueValuesByColumn[h] ?? [])][0] ?? '']), + ), + ], + schemaState, + ); + + // 7) Select which remaining columns map to purposes/preferences + logger.info(colors.green('\n[Step 4/6] Purpose/preference column selection...')); + const timestampCol = schemaState.getValue('timestampColumn'); + const mappedSoFar = [...identifierCols, ...(timestampCol ? [timestampCol] : [])]; + const remainingColumns = headers.filter((h) => !mappedSoFar.includes(h)); + + const { purposeColumns } = await inquirer.prompt<{ + purposeColumns: string[]; + }>([ + { + name: 'purposeColumns', + type: 'checkbox', + message: 'Select columns that map to purposes/preferences', + choices: remainingColumns, + validate: (v: string[]) => v.length > 0 || 'Select at least one purpose column', + }, + ]); + + const nonPurposeColumns = remainingColumns.filter((h) => !purposeColumns.includes(h)); + + // 8) Build synthetic rows driven ONLY by purpose column unique values + logger.info(colors.green('\n[Step 5/6] Mapping purpose values...')); + const syntheticRows = buildSyntheticRows(headers, uniqueValuesByColumn, purposeColumns); + logger.info( + colors.green( + ` Built ${syntheticRows.length} synthetic rows ` + + `(from ${purposeColumns.length} purpose columns).`, + ), + ); + + // 9) Map purpose columns to org purposes + value mappings + await parsePreferenceAndPurposeValuesFromCsv(syntheticRows, schemaState, { + purposeSlugs: purposes.map((p) => p.trackingType), + preferenceTopics, + forceTriggerWorkflows: false, + columnsToIgnore: nonPurposeColumns, + }); + + // 10) Metadata: select which remaining columns to INCLUDE as metadata + logger.info(colors.green('\n[Step 6/6] Metadata column selection...')); + if (nonPurposeColumns.length > 0) { + logger.info( + colors.magenta('\nRemaining unmapped columns:\n' + ` ${nonPurposeColumns.join(', ')}\n`), + ); + + const { metadataColumns } = await inquirer.prompt<{ + metadataColumns: string[]; + }>([ + { + name: 'metadataColumns', + type: 'checkbox', + message: 'Select columns to INCLUDE as metadata ' + '(unselected columns will be ignored)', + choices: nonPurposeColumns, + }, + ]); + + const ignored = nonPurposeColumns.filter((c) => !metadataColumns.includes(c)); + + if (ignored.length > 0) { + schemaState.setValue(ignored, 'columnsToIgnore'); + } + + if (metadataColumns.length > 0) { + const columnToMetadata: Record = {}; + for (const col of metadataColumns) { + columnToMetadata[col] = { key: col }; + } + schemaState.setValue(columnToMetadata, 'columnToMetadata'); + } + + logger.info( + colors.green( + ` Metadata: ${metadataColumns.length > 0 ? metadataColumns.join(', ') : '(none)'}`, + ), + ); + logger.info(colors.green(` Ignored: ${ignored.length > 0 ? ignored.join(', ') : '(none)'}`)); + } + + // 11) Validate completeness + const purposeCols = Object.keys(schemaState.getValue('columnToPurposeName')); + const ignoredCols = schemaState.getValue('columnsToIgnore') ?? []; + const metadataCols = Object.keys(schemaState.getValue('columnToMetadata') ?? {}); + const allMapped = new Set([ + ...identifierCols, + ...purposeCols, + ...ignoredCols, + ...metadataCols, + ...(timestampCol ? [timestampCol] : []), + ]); + const unmapped = headers.filter((h) => !allMapped.has(h)); + if (unmapped.length > 0) { + logger.warn( + colors.yellow( + `Warning: the following columns are not mapped: ${unmapped.join(', ')}. ` + + 'They will cause errors during upload. Re-run this command to fix.', + ), + ); + } + + schemaState.setValue(new Date().toISOString(), 'lastFetchedAt'); + + logger.info(colors.green(`\nConfiguration saved to: ${schemaFile}`)); + logger.info( + colors.green( + ` Identifiers: ${identifierCols.join(', ')}\n` + + ` Timestamp: ${timestampCol || '(none)'}\n` + + ` Purpose columns: ${purposeCols.join(', ')}\n` + + ` Metadata: ${metadataCols.join(', ') || '(none)'}\n` + + ` Ignored: ${ignoredCols.join(', ') || '(none)'}`, + ), + ); +} diff --git a/packages/cli/src/commands/consent/routes.ts b/packages/cli/src/commands/consent/routes.ts index bd24e3d4..b3a8d816 100644 --- a/packages/cli/src/commands/consent/routes.ts +++ b/packages/cli/src/commands/consent/routes.ts @@ -1,6 +1,7 @@ import { buildRouteMap } from '@stricli/core'; import { buildXdiSyncEndpointCommand } from './build-xdi-sync-endpoint/command.js'; +import { configurePreferenceUploadCommand } from './configure-preference-upload/command.js'; import { deletePreferenceRecordsCommand } from './delete-preference-records/command.js'; import { generateAccessTokensCommand } from './generate-access-tokens/command.js'; import { pullConsentMetricsCommand } from './pull-consent-metrics/command.js'; @@ -14,6 +15,7 @@ import { uploadPreferencesCommand } from './upload-preferences/command.js'; export const consentRoutes = buildRouteMap({ routes: { 'build-xdi-sync-endpoint': buildXdiSyncEndpointCommand, + 'configure-preference-upload': configurePreferenceUploadCommand, 'generate-access-tokens': generateAccessTokensCommand, 'pull-consent-metrics': pullConsentMetricsCommand, 'pull-consent-preferences': pullConsentPreferencesCommand, diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/index.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/index.ts index bf2ebba3..c51a1158 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/index.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/index.ts @@ -1,4 +1,5 @@ export * from './readFailingUpdatesFromReceipt.js'; export * from './summarizeReceipt.js'; +export * from './receiptsState.js'; export * from './resolveReceiptPath.js'; export * from './applyReceiptSummary.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts new file mode 100644 index 00000000..abd5c735 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts @@ -0,0 +1,147 @@ +import { PersistedState } from '@transcend-io/persisted-state'; + +import { retrySamePromise, type RetryPolicy } from '../../../../../lib/helpers/retrySamePromise.js'; +import { + RequestUploadReceipts, + type FailingPreferenceUpdates, + type PendingSafePreferenceUpdates, + type PendingWithConflictPreferenceUpdates, + type PreferenceUpdateMap, + type SkippedPreferenceUpdates, +} from '../../../../../lib/preference-management/index.js'; + +export type PreferenceReceiptsInterface = { + /** Path to file */ + receiptsFilepath: string; + /** + * Get the successfully updated records + */ + getSuccessful(): PreferenceUpdateMap; + /** + * Get the records pending upload + */ + getPending(): PreferenceUpdateMap; + /** + * Get the failing to upload records + */ + getFailing(): FailingPreferenceUpdates; + /** + * Set the new map of successful records + */ + setSuccessful(next: PreferenceUpdateMap): Promise; + /** + * Set the new map of pending records + */ + setPending(next: PreferenceUpdateMap): Promise; + /** + * Set the new map of safe to upload records + */ + setPendingSafe(next: PendingSafePreferenceUpdates): Promise; + /** + * Set the skipped records + */ + setSkipped(next: PendingSafePreferenceUpdates): Promise; + /** + * Set the new map of conflict upload records + */ + setPendingConflict(next: PendingWithConflictPreferenceUpdates): Promise; + /** + * Set the new map of failing records + */ + setFailing(next: FailingPreferenceUpdates): Promise; + /** + * Reset the pending records + */ + resetPending(): Promise; +}; + +/** + * Build a receipts state adapter for the given file path. + * + * Retries creation of the underlying PersistedState with **exponential backoff** + * when the receipts file cannot be parsed due to a transient write (e.g., empty + * or partially written file) indicated by "Unexpected end of JSON input". + * + * @param filepath - Where to persist/read upload receipts + * @returns Receipt state port with strongly-named methods + */ +export async function makeReceiptsState(filepath: string): Promise { + // Initial shape if file does not exist or is empty. + const initial = { + failingUpdates: {}, + pendingConflictUpdates: {}, + skippedUpdates: {}, + pendingSafeUpdates: {}, + successfulUpdates: {}, + pendingUpdates: {}, + lastFetchedAt: new Date().toISOString(), + } as const; + + // Retry policy: only retry on the specific JSON truncation message. + const policy: RetryPolicy = { + maxAttempts: 10, + delayMs: 500, // start small and backoff + shouldRetry: (_status, message) => + typeof message === 'string' && /Unexpected end of JSON input/i.test(message ?? ''), + }; + + // Exponential backoff cap to avoid unbounded waits. + const MAX_DELAY_MS = 5_000; + + try { + const s = await retrySamePromise( + async () => { + // Wrap constructor in a Promise so thrown sync errors reject properly. + const result = await Promise.resolve( + new PersistedState(filepath, RequestUploadReceipts, initial), + ); + return result; + }, + policy, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + (_note) => { + // Double the delay on each backoff (cap at MAX_DELAY_MS) + policy.delayMs = Math.min(MAX_DELAY_MS, Math.max(1, policy.delayMs * 2)); + // Optional local diagnostics: + // process.stderr.write(`[receiptsState] ${_note}; next delay=${policy.delayMs}ms\n`); + }, + ); + + return { + receiptsFilepath: filepath, + getSuccessful: () => s.getValue('successfulUpdates'), + getPending: () => s.getValue('pendingUpdates'), + getFailing: () => s.getValue('failingUpdates'), + async setSuccessful(v: PreferenceUpdateMap) { + await s.setValue(v, 'successfulUpdates'); + }, + async setSkipped(v: SkippedPreferenceUpdates) { + await s.setValue(v, 'skippedUpdates'); + }, + async setPending(v: PreferenceUpdateMap) { + await s.setValue(v, 'pendingUpdates'); + }, + async setPendingSafe(v: PendingSafePreferenceUpdates) { + await s.setValue(v, 'pendingSafeUpdates'); + }, + async setPendingConflict(v: PendingWithConflictPreferenceUpdates) { + await s.setValue(v, 'pendingConflictUpdates'); + }, + async setFailing(v: FailingPreferenceUpdates) { + await s.setValue(v, 'failingUpdates'); + }, + async resetPending() { + await s.setValue({}, 'pendingUpdates'); + await s.setValue({}, 'pendingSafeUpdates'); + await s.setValue({}, 'skippedUpdates'); + await s.setValue({}, 'pendingConflictUpdates'); + }, + }; + } catch (error) { + throw new Error( + `Failed to create receipts state for ${filepath}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } +} diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/tests/receiptsState.test.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/tests/receiptsState.test.ts new file mode 100644 index 00000000..1dbd8209 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/tests/receiptsState.test.ts @@ -0,0 +1,53 @@ +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { describe, expect, it } from 'vitest'; + +import { makeReceiptsState } from '../receiptsState.js'; + +describe('makeReceiptsState', () => { + it('creates an empty receipts store and persists updates', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'cli-receipts-state-')); + const receiptsFilepath = join(tempDir, 'receipts.json'); + + const receipts = await makeReceiptsState(receiptsFilepath); + + expect(receipts.receiptsFilepath).toBe(receiptsFilepath); + expect(receipts.getPending()).toEqual({}); + expect(receipts.getSuccessful()).toEqual({}); + expect(receipts.getFailing()).toEqual({}); + + await receipts.setPending({ + row1: true, + }); + await receipts.setSuccessful({ + row2: true, + }); + await receipts.setFailing({ + row3: { + uploadedAt: '2025-08-07T00:00:00.000Z', + error: 'boom', + update: { + partition: 'partition-1', + timestamp: '2025-08-07T00:00:00.000Z', + identifiers: [{ name: 'email', value: 'test@example.com' }], + }, + }, + }); + + expect(receipts.getPending()).toEqual({ row1: true }); + expect(receipts.getSuccessful()).toEqual({ row2: true }); + expect(receipts.getFailing()).toEqual({ + row3: { + uploadedAt: '2025-08-07T00:00:00.000Z', + error: 'boom', + update: { + partition: 'partition-1', + timestamp: '2025-08-07T00:00:00.000Z', + identifiers: [{ name: 'email', value: 'test@example.com' }], + }, + }, + }); + }); +}); diff --git a/packages/cli/src/commands/consent/upload-preferences/buildTaskOptions.ts b/packages/cli/src/commands/consent/upload-preferences/buildTaskOptions.ts new file mode 100644 index 00000000..c7d96050 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/buildTaskOptions.ts @@ -0,0 +1,86 @@ +// helpers/buildCommon.ts +import type { UploadPreferencesCommandFlags } from './impl.js'; + +/** Common options shared by upload tasks */ +export type TaskCommonOpts = Pick< + UploadPreferencesCommandFlags, + | 'auth' + | 'partition' + | 'sombraAuth' + | 'directory' + | 'transcendUrl' + | 'skipConflictUpdates' + | 'uploadConcurrency' + | 'uploadLogInterval' + | 'maxChunkSize' + | 'downloadIdentifierConcurrency' + | 'rateLimitRetryDelay' + | 'maxRecordsToReceipt' + | 'skipWorkflowTriggers' + | 'skipExistingRecordCheck' + | 'isSilent' + | 'dryRun' + | 'attributes' + | 'forceTriggerWorkflows' +> & { + schemaFile: string; + receiptsFolder: string; +}; + +/** + * Copy the options from the main command over to the spawned tasks + * + * @param flags - All flags + * @param schemaFile - Schema file + * @param receiptsFolder - Receipts folder + * @returns Common task options + */ +export function buildCommonOpts( + flags: UploadPreferencesCommandFlags, + schemaFile: string, + receiptsFolder: string, +): TaskCommonOpts { + const { + auth, + directory, + sombraAuth, + partition, + transcendUrl, + downloadIdentifierConcurrency, + skipConflictUpdates, + skipWorkflowTriggers, + skipExistingRecordCheck, + isSilent, + dryRun, + attributes, + forceTriggerWorkflows, + uploadConcurrency, + maxChunkSize, + rateLimitRetryDelay, + maxRecordsToReceipt, + uploadLogInterval, + } = flags; + + return { + schemaFile, + receiptsFolder, + auth, + directory, + downloadIdentifierConcurrency, + sombraAuth, + partition, + transcendUrl, + skipConflictUpdates, + skipWorkflowTriggers, + skipExistingRecordCheck, + isSilent, + dryRun, + attributes, + forceTriggerWorkflows, + uploadConcurrency, + maxChunkSize, + rateLimitRetryDelay, + maxRecordsToReceipt, + uploadLogInterval, + }; +} diff --git a/packages/cli/src/commands/consent/upload-preferences/command.ts b/packages/cli/src/commands/consent/upload-preferences/command.ts index b2c83395..43ebab15 100644 --- a/packages/cli/src/commands/consent/upload-preferences/command.ts +++ b/packages/cli/src/commands/consent/upload-preferences/command.ts @@ -19,6 +19,7 @@ export const uploadPreferencesCommand = buildCommand({ ScopeName.ManageStoredPreferences, ScopeName.ViewManagedConsentDatabaseAdminApi, ScopeName.ViewPreferenceStoreSettings, + ScopeName.ViewRequestIdentitySettings, ], }), partition: { @@ -28,17 +29,10 @@ export const uploadPreferencesCommand = buildCommand({ }, sombraAuth: createSombraAuthParameter(), transcendUrl: createTranscendUrlParameter(), - file: { - kind: 'parsed', - parse: String, - brief: 'Path to the CSV file to load preferences from', - optional: true, - }, directory: { kind: 'parsed', parse: String, brief: 'Path to the directory of CSV files to load preferences from', - optional: true, }, dryRun: { kind: 'boolean', @@ -55,8 +49,17 @@ export const uploadPreferencesCommand = buildCommand({ receiptFileDir: { kind: 'parsed', parse: String, - brief: 'Directory path where the response receipts should be saved', - default: './receipts', + brief: + 'Directory path where the response receipts should be saved. Defaults to ./receipts if a "file" is provided, or /../receipts if a "directory" is provided.', + optional: true, + }, + schemaFilePath: { + kind: 'parsed', + parse: String, + brief: + 'The path to where the schema for the file should be saved. If file is provided, it will default to ./-preference-upload-schema.json ' + + 'If directory is provided, it will default to /../preference-upload-schema.json', + optional: true, }, skipWorkflowTriggers: { kind: 'boolean', @@ -95,17 +98,104 @@ export const uploadPreferencesCommand = buildCommand({ concurrency: { kind: 'parsed', parse: numberParser, - brief: 'The concurrency to use when uploading in parallel', + brief: + 'The number of concurrent processes to use to upload the files. When this is not set, it defaults ' + + 'to the number of CPU cores available on the machine. ' + + 'e.g. if there are 5 concurrent processes for 15 files, each parallel job would get 3 files to process. ', + optional: true, + }, + uploadConcurrency: { + kind: 'parsed', + parse: numberParser, + brief: + 'When uploading preferences to v1/preferences - this is the number of concurrent requests made at any given time by a single process.' + + "This is NOT the batch size—it's how many batch *tasks* run in parallel. " + + 'The number of total concurrent requests is maxed out at concurrency * uploadConcurrency.', + default: '75', // FIXME 25 + }, + maxChunkSize: { + kind: 'parsed', + parse: numberParser, + brief: + 'When uploading preferences to v1/preferences - this is the maximum number of records to put in a single request.' + + 'The number of total concurrent records being put in at any one time is is maxed out at maxChunkSize * concurrency * uploadConcurrency.', + default: '25', + }, + rateLimitRetryDelay: { + kind: 'parsed', + parse: numberParser, + brief: + 'When uploading preferences to v1/preferences - this is the number of milliseconds to wait before retrying a request that was rate limited. ' + + 'This is only used if the request is rate limited by the Transcend API. ' + + 'If the request fails for any other reason, it will not be retried. ', + default: '3000', + }, + uploadLogInterval: { + kind: 'parsed', + parse: numberParser, + brief: + 'When uploading preferences to v1/preferences - this is the number of records after which to log progress. ' + + 'Output will be logged to console and also to the receipt file. ' + + 'Setting this value lower will allow for you to more easily pick up where you left off. ' + + 'Setting this value higher can avoid excessive i/o operations slowing down the upload. ' + + 'Default is a good optimization for most cases.', + default: '1000', + }, + downloadIdentifierConcurrency: { + kind: 'parsed', + parse: numberParser, + brief: + 'When downloading identifiers for the upload - this is the number of concurrent requests to make. ' + + 'This is only used if the records are not already cached in the preference store. ', + default: '30', + }, + maxRecordsToReceipt: { + kind: 'parsed', + parse: numberParser, + brief: + 'When writing out successful and pending records to the receipt file - this is the maximum number of records to write out. ' + + 'This is to avoid the receipt file getting too large for JSON.parse/stringify.', default: '10', }, + regenerate: { + kind: 'boolean', + brief: + 'Force re-generation of the schema config file before uploading. ' + + 'Runs the interactive configure flow even if a config already exists.', + default: false, + }, + chunkSizeMB: { + kind: 'parsed', + parse: numberParser, + brief: + 'Auto-chunk threshold in MB. Any CSV file larger than this will be ' + + 'split into smaller files before uploading. Set to 0 to disable.', + default: '11', + }, + viewerMode: { + kind: 'boolean', + brief: 'Run in non-interactive viewer mode (no attach UI, auto-artifacts)', + default: false, + }, }, }, docs: { brief: 'Upload preference management data to your Preference Store', fullDescription: `Upload preference management data to your Preference Store. -This command prompts you to map the shape of the CSV to the shape of the Transcend API. There is no requirement for the shape of the incoming CSV, as the script will handle the mapping process. +Requires a config file (generated by 'configure-preference-upload') that maps +CSV columns to identifiers, purposes, and preferences. If no config exists, +pass --regenerate to run the interactive configure flow first. + +Large files are automatically chunked into smaller pieces (controlled by +--chunkSizeMB) before uploading. -The script will also produce a JSON cache file that allows for the mappings to be preserved between runs.`, +Parallel preference uploader (Node 22+ ESM/TS) +----------------------------------------------------------------------------- +- Spawns a pool of child *processes* (not threads) to run uploads in parallel. +- Shows a live dashboard in the parent terminal with progress per worker. +- Creates per-worker log files and (optionally) opens OS terminals to tail them. +- Uses the same module as both parent and child; the child mode is toggled + by the presence of a CLI flag ('--as-child').`, }, }); diff --git a/packages/cli/src/commands/consent/upload-preferences/impl.ts b/packages/cli/src/commands/consent/upload-preferences/impl.ts index ca9eb4f3..50932196 100644 --- a/packages/cli/src/commands/consent/upload-preferences/impl.ts +++ b/packages/cli/src/commands/consent/upload-preferences/impl.ts @@ -1,103 +1,194 @@ -import { readdirSync } from 'node:fs'; -import { basename, join } from 'node:path'; +import { statSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; import colors from 'colors'; import type { LocalContext } from '../../../context.js'; -import { map } from '../../../lib/bluebird.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; -import { uploadPreferenceManagementPreferencesInteractive } from '../../../lib/preference-management/index.js'; -import { splitCsvToList } from '../../../lib/requests/index.js'; +import { chunkOneCsvFile } from '../../../lib/helpers/chunkOneCsvFile.js'; +import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; +import { + computePoolSize, + CHILD_FLAG, + type PoolHooks, + runPool, + dashboardPlugin, + buildExportStatus, + createExtraKeyHandler, +} from '../../../lib/pooling/index.js'; import { logger } from '../../../logger.js'; +import { + computeReceiptsFolder, + computeSchemaFile, + ExportManager, + writeFailingUpdatesCsv, + type FailingUpdateRow, +} from './artifacts/index.js'; +import { applyReceiptSummary } from './artifacts/receipts/index.js'; +import { buildCommonOpts } from './buildTaskOptions.js'; +import { + AnyTotals, + isUploadModeTotals, + isCheckModeTotals, + uploadPreferencesPlugin, +} from './ui/index.js'; +import { runChild } from './worker.js'; + +/** + * A unit of work: instructs a worker to upload (or check) a single CSV file. + */ +export type UploadPreferencesTask = { + /** Absolute path of the CSV file to process. */ + filePath: string; + /** Command/worker options shared across tasks (built from CLI flags). */ + options: ReturnType; +}; + +/** + * Per-worker progress snapshot emitted by the worker. + * This mirrors the previous IPC progress payload for this command. + */ +export type UploadPreferencesProgress = { + /** File currently being processed. */ + filePath: string; + /** New successes since the last progress message (used to compute rates). */ + successDelta?: number; + /** Cumulative successes so far for the current file. */ + successTotal?: number; + /** Optional total row count for the file (if known). */ + fileTotal?: number; +}; + +/** + * Final result for a single file. + */ +export type UploadPreferencesResult = { + /** Success flag for the file. */ + ok: boolean; + /** File this result pertains to. */ + filePath: string; + /** Optional path to the worker-generated receipt file. */ + receiptFilepath?: string; + /** Optional error string when `ok === false`. */ + error?: string; +}; + +/** + * Aggregate totals shown in the dashboard. + * This command supports two modes: + * - upload mode totals + * - check mode totals + * + * The union is already defined in `./ui` as `AnyTotals`. + */ +type Totals = AnyTotals; + +/** + * Returns the current module's path so the worker pool knows what file to re-exec. + * In Node ESM, __filename is undefined, so we fall back to argv[1]. + * + * @returns The current module's path as a string + */ +function getCurrentModulePath(): string { + if (typeof __filename !== 'undefined') { + return __filename as unknown as string; + } + return process.argv[1]; +} export interface UploadPreferencesCommandFlags { auth: string; partition: string; sombraAuth?: string; transcendUrl: string; - file?: string; - directory?: string; + directory: string; dryRun: boolean; skipExistingRecordCheck: boolean; - receiptFileDir: string; + receiptFileDir?: string; + schemaFilePath?: string; skipWorkflowTriggers: boolean; forceTriggerWorkflows: boolean; skipConflictUpdates: boolean; isSilent: boolean; attributes: string; receiptFilepath: string; - concurrency: number; + concurrency?: number; + uploadConcurrency: number; + maxChunkSize: number; + rateLimitRetryDelay: number; + uploadLogInterval: number; + downloadIdentifierConcurrency: number; + maxRecordsToReceipt: number; + regenerate: boolean; + chunkSizeMB: number; + viewerMode: boolean; } +/** + * Parent entrypoint for uploading/checking many preference CSVs in parallel. + * + * Flow: + * 1) Validate inputs & discover CSV files (exit if none). + * 2) Compute pool size from `--concurrency` or CPU heuristic. + * 3) Build `common` worker options and task queue (one task per file). + * 4) Define `PoolHooks` for task scheduling, progress, and results aggregation. + * 5) Launch the pool with `runPool`, rendering via `dashboardPlugin(uploadPreferencesPlugin)`. + * + * All log exporting / artifact work that used to be done in “viewer mode” can be handled + * in `postProcess` using the new log context from the runner. + * + * @param flags - CLI options for the run. + * @returns Promise that resolves when the pool completes. + */ export async function uploadPreferences( this: LocalContext, - { + flags: UploadPreferencesCommandFlags, +): Promise { + const { auth, partition, sombraAuth, transcendUrl, - file = '', directory, - dryRun, skipExistingRecordCheck, receiptFileDir, - skipWorkflowTriggers, - forceTriggerWorkflows, - skipConflictUpdates, - isSilent, - attributes, + schemaFilePath, concurrency, - }: UploadPreferencesCommandFlags, -): Promise { - if (!!directory && !!file) { - logger.error( - colors.red('Cannot provide both a directory and a file. Please provide only one.'), - ); - this.process.exit(1); - } - - if (!file && !directory) { - logger.error( - colors.red( - 'A file or directory must be provided. Please provide one using --file=./preferences.csv or --directory=./preferences', - ), - ); - this.process.exit(1); - } + regenerate, + chunkSizeMB, + viewerMode, + } = flags; + /* 1) Validate & find inputs */ + let files = collectCsvFilesOrExit(directory, this); doneInputValidation(this.process.exit); - const files: string[] = []; - - if (directory) { - try { - const filesInDirectory = readdirSync(directory); - const csvFiles = filesInDirectory.filter((file) => file.endsWith('.csv')); - - if (csvFiles.length === 0) { - logger.error(colors.red(`No CSV files found in directory: ${directory}`)); - this.process.exit(1); + /* 1b) Auto-chunk oversized files */ + if (chunkSizeMB > 0) { + const chunkThreshold = chunkSizeMB * 1024 * 1024; + const oversized = files.filter((f) => { + try { + return statSync(f).size > chunkThreshold; + } catch { + return false; } - - // Add full paths for each CSV file - files.push(...csvFiles.map((file) => join(directory, file))); - } catch (err) { - logger.error(colors.red(`Failed to read directory: ${directory}`)); - logger.error(colors.red((err as Error).message)); - this.process.exit(1); - } - } else { - try { - // Verify file exists and is a CSV - if (!file.endsWith('.csv')) { - logger.error(colors.red('File must be a CSV file')); - this.process.exit(1); + }); + if (oversized.length > 0) { + logger.info( + colors.yellow(`Auto-chunking ${oversized.length} file(s) exceeding ${chunkSizeMB}MB...`), + ); + for (const file of oversized) { + await chunkOneCsvFile({ + filePath: file, + outputDir: directory, + clearOutputDir: false, + chunkSizeMB, + // eslint-disable-next-line @typescript-eslint/no-empty-function + onProgress: () => {}, + }); } - files.push(file); - } catch (err) { - logger.error(colors.red(`Failed to access file: ${file}`)); - logger.error(colors.red((err as Error).message)); - this.process.exit(1); + // Re-collect after chunking (new chunk files will be in the directory) + files = collectCsvFilesOrExit(directory, this); } } @@ -106,32 +197,195 @@ export async function uploadPreferences( `Processing ${files.length} consent preferences files for partition: ${partition}`, ), ); - logger.debug(`Files to process: ${files.join(', ')}`); + logger.debug( + `Files to process:\n${files.slice(0, 10).join('\n')}\n${ + files.length > 10 ? `... and ${files.length - 10} more` : '' + }`, + ); if (skipExistingRecordCheck) { - logger.info(colors.bgYellow(`Skipping existing record check: ${skipExistingRecordCheck}`)); + logger.info(colors.bgYellow('Skipping existing record check: true')); } - await map( - files, - async (filePath) => { - const fileName = basename(filePath).replace('.csv', ''); - await uploadPreferenceManagementPreferencesInteractive({ - receiptFilepath: join(receiptFileDir, `${fileName}-receipts.json`), + const receiptsFolder = computeReceiptsFolder(receiptFileDir, directory); + const schemaFile = computeSchemaFile(schemaFilePath, directory, files[0]); + + /* 1c) Auto-configure if needed */ + const configExists = existsSync(schemaFile); + if (!configExists || regenerate) { + if (!configExists && !regenerate) { + logger.error( + colors.red( + `No config file found at: ${schemaFile}\n` + + "Run 'transcend consent configure-preference-upload' to create one, " + + 'or pass --regenerate to run the interactive setup now.', + ), + ); + this.process.exit(1); + } + if (regenerate) { + logger.info(colors.yellow('Running interactive config generation...')); + const { configurePreferenceUpload } = await import('../configure-preference-upload/impl.js'); + await configurePreferenceUpload.call(this, { auth, sombraAuth, - file: filePath, - partition, transcendUrl, - skipConflictUpdates, - skipWorkflowTriggers, - skipExistingRecordCheck, - isSilent, - dryRun, - attributes: splitCsvToList(attributes), - forceTriggerWorkflows, + directory, + schemaFilePath, + partition, + }); + } + } + + /* 2) Pool size */ + const { poolSize, cpuCount } = computePoolSize(concurrency, files.length); + + /* 3) Build shared worker options and queue */ + const common = buildCommonOpts(flags, schemaFile, receiptsFolder); + + // FIFO queue: one task per file + const queue = files.map((filePath) => ({ + filePath, + options: common, + })); + + // Dashboard artifacts/export status (shown during renders) + // inside uploadPreferences() before runPool call: + const exportMgr = new ExportManager(receiptsFolder); + const exportStatus = buildExportStatus(receiptsFolder); + const failingUpdatesMem: FailingUpdateRow[] = []; + + /* 4) Hooks */ + const hooks: PoolHooks< + UploadPreferencesTask, + UploadPreferencesProgress, + UploadPreferencesResult, + Totals + > = { + nextTask: () => queue.shift(), + taskLabel: (t) => t.filePath, + initTotals: () => + !common.dryRun + ? ({ + mode: 'upload', + success: 0, + skipped: 0, + error: 0, + errors: {}, + } as Totals) + : ({ + mode: 'check', + totalPending: 0, + pendingConflicts: 0, + pendingSafe: 0, + skipped: 0, + } as Totals), + initSlotProgress: () => undefined, + onProgress: (totals) => totals, + onResult: (totals, res) => { + applyReceiptSummary({ + receiptsFolder: common.receiptsFolder, + filePath: res.filePath, + receiptFilepath: res.receiptFilepath, + agg: totals, + dryRun: common.dryRun, + failingUpdatesMem, }); + return { totals, ok: !!res.ok }; }, - { concurrency }, - ); + exportStatus: () => exportStatus, + /** + * Finalization after all workers exit. + * With the new runner you also receive: + * - logDir + * - logsBySlot (Map) + * - startedAt / finishedAt + * - getLogPathsForSlot(id) + * - viewerMode (boolean) + * + * @param options - Options with logDir, logsBySlot, startedAt, finishedAt, etc. + */ + postProcess: async ({ totals, logsBySlot }) => { + try { + // Persist failing updates CSV next to receipts/logDir. + const fPath = join(receiptsFolder, 'failing-updates.csv'); + await writeFailingUpdatesCsv(failingUpdatesMem, fPath); + exportStatus.failuresCsv = { + path: fPath, + savedAt: Date.now(), + exported: true, + }; + + // Save logs + await Promise.all([ + exportMgr.exportCombinedLogs(logsBySlot, 'error'), + exportMgr.exportCombinedLogs(logsBySlot, 'warn'), + exportMgr.exportCombinedLogs(logsBySlot, 'info'), + exportMgr.exportCombinedLogs(logsBySlot, 'all'), + ]); + + // Summarize totals to stdout (parity with the old implementation) + if (isUploadModeTotals(totals)) { + logger.info( + colors.green( + `All done. Success:${totals.success.toLocaleString()} ` + + `Skipped:${totals.skipped.toLocaleString()} ` + + `Error:${totals.error.toLocaleString()}`, + ), + ); + } else if (isCheckModeTotals(totals)) { + logger.info( + colors.green( + `All done. Pending:${totals.totalPending.toLocaleString()} ` + + `PendingConflicts:${totals.pendingConflicts.toLocaleString()} ` + + `PendingSafe:${totals.pendingSafe.toLocaleString()} ` + + `Skipped:${totals.skipped.toLocaleString()}`, + ), + ); + } + } catch (err: unknown) { + logger.error(colors.red(`Failed to export artifacts: ${String(err)}`)); + } + }, + }; + + /* 5) Launch the pool runner with our hooks and dashboard plugin. */ + await runPool({ + title: `Upload Preferences - ${directory}`, + baseDir: directory || receiptsFolder || process.cwd(), + childFlag: CHILD_FLAG, + childModulePath: getCurrentModulePath(), + poolSize, + cpuCount, + filesTotal: files.length, + hooks, + viewerMode, + render: (input) => dashboardPlugin(input, uploadPreferencesPlugin), + extraKeyHandler: ({ logsBySlot, repaint, setPaused }) => + createExtraKeyHandler({ + logsBySlot, + repaint, + setPaused, + exportMgr, // enables E/W/I/A + exportStatus, // keeps the exports panel updated + custom: { + F: async ({ noteExport, say }) => { + const fPath = join(receiptsFolder, 'failing-updates.csv'); + await writeFailingUpdatesCsv(failingUpdatesMem, fPath); + say(`\nWrote failing updates CSV to: ${fPath}`); + noteExport('failuresCsv', fPath); + }, + }, + }), + }); +} + +/* ------------------------------------------------------------------------------------------------- + * If invoked directly as a child process, enter worker loop + * ------------------------------------------------------------------------------------------------- */ +if (process.argv.includes(CHILD_FLAG)) { + runChild().catch((err) => { + logger.error(err); + process.exit(1); + }); } diff --git a/packages/cli/src/commands/consent/upload-preferences/readme.ts b/packages/cli/src/commands/consent/upload-preferences/readme.ts index 4a7d2097..9efb5ac7 100644 --- a/packages/cli/src/commands/consent/upload-preferences/readme.ts +++ b/packages/cli/src/commands/consent/upload-preferences/readme.ts @@ -9,7 +9,7 @@ const examples = buildExamples( 'Upload consent preferences to partition key `4d1c5daa-90b7-4d18-aa40-f86a43d2c726`', flags: { auth: '$TRANSCEND_API_KEY', - file: './preferences.csv', + directory: './examples/pm-test', partition: '4d1c5daa-90b7-4d18-aa40-f86a43d2c726', }, }, @@ -18,7 +18,7 @@ const examples = buildExamples( flags: { auth: '$TRANSCEND_API_KEY', partition: '4d1c5daa-90b7-4d18-aa40-f86a43d2c726', - file: './preferences.csv', + directory: './examples/pm-test', dryRun: true, skipWorkflowTriggers: true, skipConflictUpdates: true, @@ -32,7 +32,7 @@ const examples = buildExamples( flags: { auth: '$TRANSCEND_API_KEY', partition: '4d1c5daa-90b7-4d18-aa40-f86a43d2c726', - file: './preferences.csv', + directory: './examples/pm-test', transcendUrl: 'https://api.us.transcend.io', }, }, diff --git a/packages/cli/src/commands/consent/upload-preferences/schemaState.ts b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts new file mode 100644 index 00000000..cf1ea894 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts @@ -0,0 +1,89 @@ +import { PersistedState } from '@transcend-io/persisted-state'; + +import { retrySamePromise, type RetryPolicy } from '../../../lib/helpers/retrySamePromise.js'; +import { + FileFormatState, + type ColumnIdentifierMap, + type ColumnMetadataMap, + type ColumnPurposeMap, +} from '../../../lib/preference-management/index.js'; + +export interface PreferenceSchemaInterface { + /** Name of the column used as timestamp, if any */ + getTimestampColumn(): string | undefined; + /** CSV column name -> Purpose/Preference mapping */ + getColumnToPurposeName(): ColumnPurposeMap; + /** CSV column name -> Identifier mapping */ + getColumnToIdentifier(): ColumnIdentifierMap; + /** CSV column name -> Metadata key mapping */ + getColumnToMetadata(): ColumnMetadataMap | undefined; + /** CSV columns to ignore during upload */ + getColumnsToIgnore(): string[]; + /** The persisted cache */ // FIXME remove this + state: PersistedState; +} + +/** + * Build a schema state adapter holding CSV→purpose/identifier mappings. + * + * Retries creation of the underlying PersistedState with **exponential backoff** + * when the cache file cannot be parsed due to a transient write (e.g., empty or + * partially written file) indicated by "Unexpected end of JSON input". + * + * @param filepath - Path to the schema cache file + * @returns Schema state port with strongly-named methods + */ +export async function makeSchemaState(filepath: string): Promise { + // Initial state used if file does not exist or is empty. + const initial = { + columnToPurposeName: {}, + lastFetchedAt: new Date().toISOString(), + columnToIdentifier: {}, + } as const; + + // Retry policy: only retry on the specific JSON truncation message. + const policy: RetryPolicy = { + maxAttempts: 5, + delayMs: 50, // start small + shouldRetry: (_status, message) => + typeof message === 'string' && /Unexpected end of JSON input/i.test(message ?? ''), + }; + + // Exponential backoff with a reasonable cap. + const MAX_DELAY_MS = 2_000; + + try { + const state = await retrySamePromise( + async () => { + // Wrap constructor in a Promise so thrown sync errors reject properly. + const result = await Promise.resolve( + new PersistedState(filepath, FileFormatState, initial), + ); + return result; + }, + policy, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + (note) => { + // Double the delay on each backoff (cap at MAX_DELAY_MS) + policy.delayMs = Math.min(MAX_DELAY_MS, Math.max(1, policy.delayMs * 2)); + // Optional: uncomment for local diagnostics + // process.stderr.write(`[schemaState] ${note}; next delay=${policy.delayMs}ms\n`); + }, + ); + + return { + state, + getTimestampColumn: (): string | undefined => state.getValue('timestampColumn'), + getColumnToPurposeName: (): ColumnPurposeMap => state.getValue('columnToPurposeName'), + getColumnToIdentifier: (): ColumnIdentifierMap => state.getValue('columnToIdentifier'), + getColumnToMetadata: (): ColumnMetadataMap | undefined => state.getValue('columnToMetadata'), + getColumnsToIgnore: (): string[] => state.getValue('columnsToIgnore') ?? [], + }; + } catch (err) { + throw new Error( + `Failed to create schema state from ${filepath}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } +} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts b/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts index 8ff51413..f44b29bd 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts @@ -75,6 +75,7 @@ export async function uploadChunkWithSplit( // 2) For retryable statuses, attempt in-place retries without splitting. const isSoftRateLimit = + // FIXME status === 400 && /slow down|please try again shortly|Throughput exceeds the current/i.test(msg); diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts new file mode 100644 index 00000000..99631fa8 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts @@ -0,0 +1,183 @@ +import colors from 'colors'; +import type { Got } from 'got'; +import type { GraphQLClient } from 'graphql-request'; +import * as t from 'io-ts'; + +import type { FormattedAttribute } from '../../../../lib/graphql/formatAttributeValues.js'; +import { limitRecords } from '../../../../lib/helpers/index.js'; +import type { + FileFormatState, + PendingSafePreferenceUpdates, + PendingWithConflictPreferenceUpdates, + SkippedPreferenceUpdates, +} from '../../../../lib/preference-management/codecs.js'; +import { parsePreferenceManagementCsvWithCache } from '../../../../lib/preference-management/index.js'; +import { parseAttributesFromString, readCsv } from '../../../../lib/requests/index.js'; +import { logger } from '../../../../logger.js'; +import { type PreferenceReceiptsInterface } from '../artifacts/receipts/receiptsState.js'; +import { type PreferenceSchemaInterface } from '../schemaState.js'; +import { loadReferenceData, type PreferenceUploadReferenceData } from './loadReferenceData.js'; +import { transformCsv } from './transform/index.js'; +import type { PreferenceUploadProgress } from './types.js'; + +export interface InteractiveUploadPreferencePlan { + /** CSV file path to load preference records from */ + file: string; + /** Partition key used throughout the upload */ + partition: string; + + /** Parsed "workflow attributes" (Key:Value pairs) */ + parsedAttributes: FormattedAttribute[]; + /** Reference data for transforming rows → PreferenceUpdateItem payloads */ + references: PreferenceUploadReferenceData; + /** Result sets derived entirely from validation/pre-processing */ + result: { + pendingSafeUpdates: PendingSafePreferenceUpdates; + pendingConflictUpdates: PendingWithConflictPreferenceUpdates; + skippedUpdates: SkippedPreferenceUpdates; + }; + + /** Snapshot of schema mappings to use during payload building */ + schema: Omit; +} + +/** + * Build an InteractiveUploadPreferencePlan by performing *validation-only* work. + * + * This performs *all pre-processing and validation* up front: + * - Reads the CSV + * - Validates timestamp column and identifier mappings (schema cache) + * - Maps columns to purposes/preferences + * - Loads current consent records (unless skipExistingRecordCheck=true) + * - Computes: pendingSafeUpdates / pendingConflictUpdates / skippedUpdates + * - Seeds the receipts file with snapshots of the pending sets + * + * The returned plan can be passed to `interactivePreferenceUploaderFromPlan` + * to perform the actual upload, keeping responsibilities cleanly separated. + * + * @param opts - Input options required to parse & validate the CSV + * @returns A fully-resolved plan ready to pass to the uploader + */ +export async function buildInteractiveUploadPreferencePlan({ + sombra, + client, + file, + partition, + receipts, + schema, + skipExistingRecordCheck = false, + forceTriggerWorkflows = false, + allowedIdentifierNames, + downloadIdentifierConcurrency = 30, + identifierDownloadLogInterval = 10000, + maxRecordsToReceipt = 50, + identifierColumns, + columnsToIgnore = [], + attributes = [], + nonInteractive = false, + onProgress, +}: { + /** Transcend GraphQL client */ + client: GraphQLClient; + /** Sombra instance to make requests to */ + sombra: Got; + /** CSV file to process */ + file: string; + /** Partition used to scope reads/writes */ + partition: string; + /** Receipts snapshots */ + receipts: PreferenceReceiptsInterface; + /** Schema information */ + schema: PreferenceSchemaInterface; + /** Skip the preflight existing-record check for speed (initial loads only) */ + skipExistingRecordCheck?: boolean; + /** Force workflow triggers; requires existing consent records for all rows */ + forceTriggerWorkflows?: boolean; + /** Concurrency for downloading identifiers */ + downloadIdentifierConcurrency?: number; + /** Allowed identifier names configured for the org/run */ + allowedIdentifierNames: string[]; + /** CSV columns that correspond to identifiers */ + identifierColumns: string[]; + /** CSV columns to ignore entirely */ + columnsToIgnore?: string[]; + /** Extra workflow attributes (pre-parsed Key:Value strings) */ + attributes?: string[]; + /** Interval to log when downloading identifiers */ + identifierDownloadLogInterval?: number; + /** Maximum records to write out to the receipt file */ + maxRecordsToReceipt?: number; + /** When true, throw instead of prompting (for worker processes) */ + nonInteractive?: boolean; + /** on progress callback */ + onProgress?: (info: PreferenceUploadProgress) => void; +}): Promise { + const parsedAttributes = parseAttributesFromString(attributes); + + // Informative status about prior runs (resume/diagnostics) + const failing = receipts.getFailing(); + const pending = receipts.getPending(); + logger.info( + colors.magenta( + 'Restored cache:\n' + + `${Object.values(failing).length} failing requests queued for retry\n` + + `${Object.values(pending).length} pending requests to process\n` + + `Processing file: ${file}\n`, + ), + ); + + // Build clients + reference data (purposes/topics/identifiers) + const references = await loadReferenceData(client); + + // Read in the file + logger.info(colors.magenta(`Reading in file: "${file}"`)); + const preferences = transformCsv(readCsv(file, t.record(t.string, t.string))); + logger.info(colors.magenta(`Read in ${preferences.length} rows`)); + + // Parse & validate CSV → derive safe/conflict/skipped sets (no uploading) + const parsed = await parsePreferenceManagementCsvWithCache( + preferences, + { + file, + purposeSlugs: references.purposes.map((x) => x.trackingType), + preferenceTopics: references.preferenceTopics, + sombra, + partitionKey: partition, + skipExistingRecordCheck, + forceTriggerWorkflows, + orgIdentifiers: references.identifiers, + allowedIdentifierNames, + downloadIdentifierConcurrency, + identifierColumns, + identifierDownloadLogInterval, + columnsToIgnore, + onProgress, + nonInteractive, + }, + schema.state, + ); + + // Persist small snapshots of the pending sets into receipts for resumability. + await receipts.setPendingSafe(limitRecords(parsed.pendingSafeUpdates, maxRecordsToReceipt)); + await receipts.setSkipped(parsed.skippedUpdates); + await receipts.setPendingConflict(parsed.pendingConflictUpdates); + + // Return a compact, self-contained plan for the upload stage. + return { + file, + partition, + parsedAttributes, + references, + result: { + pendingSafeUpdates: parsed.pendingSafeUpdates, + pendingConflictUpdates: parsed.pendingConflictUpdates, + skippedUpdates: parsed.skippedUpdates, + }, + schema: { + timestampColumn: schema.getTimestampColumn(), + columnToPurposeName: schema.getColumnToPurposeName(), + columnToIdentifier: schema.getColumnToIdentifier(), + columnToMetadata: schema.getColumnToMetadata(), + }, + }; +} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts new file mode 100644 index 00000000..ec3e310f --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts @@ -0,0 +1,6 @@ +export * from './types.js'; +export * from './loadReferenceData.js'; +export * from './buildInteractiveUploadPlan.js'; +export * from './batchUploader.js'; +export * from './transform/index.js'; +export * from './interactivePreferenceUploaderFromPlan.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts new file mode 100644 index 00000000..019e88c7 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts @@ -0,0 +1,343 @@ +import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; +import Bluebird from 'bluebird'; +/* eslint-disable no-param-reassign */ +import colors from 'colors'; +import type { Got } from 'got'; +import { chunk, groupBy } from 'lodash-es'; + +import { RETRYABLE_BATCH_STATUSES } from '../../../../constants.js'; +import { extractErrorMessage, limitRecords } from '../../../../lib/helpers/index.js'; +import { logger } from '../../../../logger.js'; +import type { PreferenceReceiptsInterface } from '../artifacts/receipts/index.js'; +import { uploadChunkWithSplit } from './batchUploader.js'; +import type { InteractiveUploadPreferencePlan } from './buildInteractiveUploadPlan.js'; +import { buildPendingUpdates } from './transform/index.js'; +import type { PreferenceUploadProgress } from './types.js'; + +const { map: pMap } = Bluebird; + +/** + * Execute the upload using a pre-built InteractiveUploadPlan. + * + * This function performs *no CSV parsing or validation*. It: + * - Converts pre-validated safe/conflict sets into PreferenceUpdateItem payloads + * - Batches + uploads with retry/split semantics + * - Writes progress snapshots to receipts + * + * @param plan - Output of `buildInteractiveUploadPlan` + * @param options - Upload-only options (batch size, concurrency, etc.) + */ +export async function interactivePreferenceUploaderFromPlan( + { + partition, + parsedAttributes, + references: { purposes, preferenceTopics }, + result: { pendingSafeUpdates, pendingConflictUpdates }, + schema, + }: InteractiveUploadPreferencePlan, + { + receipts, + sombra, + dryRun = false, + isSilent = true, + skipWorkflowTriggers = false, + skipConflictUpdates = false, + forceTriggerWorkflows = false, + uploadLogInterval = 1_000, + maxChunkSize = 25, + uploadConcurrency = 20, + maxRecordsToReceipt = 50, + onProgress, + }: { + /** Receipts interface */ + receipts: PreferenceReceiptsInterface; + /** Sombra got instance */ + sombra: Got; + /** Compute-only mode: do not PUT; still writes receipts snapshots */ + dryRun?: boolean; + /** Avoid downstream visible notifications */ + isSilent?: boolean; + /** Skip workflow triggers for each update */ + skipWorkflowTriggers?: boolean; + /** Only upload safe updates (ignore conflicts entirely) */ + skipConflictUpdates?: boolean; + /** Force triggering workflows for each update (use sparingly) */ + forceTriggerWorkflows?: boolean; + /** Log/persist cadence for progress updates */ + uploadLogInterval?: number; + /** Max records in a single batch PUT to v1/preferences */ + maxChunkSize?: number; + /** Max concurrent batch tasks at once */ + uploadConcurrency?: number; + /** Maximum records to write out to the receipt file */ + maxRecordsToReceipt?: number; + /** on progress callback */ + onProgress?: (info: PreferenceUploadProgress) => void; + }, +): Promise { + // Build final payloads (pure transform; no network) + const pendingUpdates: Record = buildPendingUpdates({ + safe: pendingSafeUpdates, + conflicts: pendingConflictUpdates, + skipConflictUpdates, + timestampColumn: schema.timestampColumn, + columnToPurposeName: schema.columnToPurposeName, + columnToIdentifier: schema.columnToIdentifier, + columnToMetadata: schema.columnToMetadata, + preferenceTopics, + purposes, + partition, + workflowAttrs: parsedAttributes, + isSilent, + skipWorkflowTriggers, + forceTriggerWorkflows, + }); + + // Seed pending uploads into receipts (first 10 expanded to keep file size small) + await receipts.setPending(limitRecords(pendingUpdates, maxRecordsToReceipt)); + + // Dry-run exits before any network calls + if (dryRun) { + logger.info( + colors.green( + `Dry run complete — ${Object.values(pendingUpdates).length} pending updates. ` + + `See receipts file: ${receipts.receiptsFilepath}`, + ), + ); + return; + } + + logger.info( + colors.magenta( + `Uploading ${ + Object.values(pendingUpdates).length + } preferences to partition: ${partition}. Concurrency: ${uploadConcurrency}, Max Chunk Size: ${maxChunkSize}` + + `, Max Records to Receipt: ${maxRecordsToReceipt}`, + ), + ); + + const t0 = Date.now(); + let uploadedCount = 0; + + // reset failing + await receipts.setFailing({}); + + // Get successful and filtered entries + const successful = receipts.getSuccessful(); + const allEntries = Object.entries(pendingUpdates) as Array<[string, PreferenceUpdateItem]>; + const filtered = allEntries.filter(([userId]) => !successful[userId]); + const fileTotal = filtered.length; + + onProgress?.({ + successDelta: 0, + successTotal: 0, + fileTotal, + }); + + if (filtered.length === 0) { + logger.warn( + colors.yellow( + `No pending updates to upload (all ${allEntries.length} are already marked successful).`, + ), + ); + await receipts.resetPending(); + return; + } + + if (filtered.length < allEntries.length) { + logger.warn( + colors.yellow( + `Filtered ${allEntries.length - filtered.length} already-successful updates. ` + + `${filtered.length} remain to upload.`, + ), + ); + } + + // Retry policy for "retry in place" statuses + const retryPolicy = { + maxAttempts: 5, + delayMs: 10_000, + shouldRetry: (status?: number) => + // eslint-disable-next-line @typescript-eslint/no-explicit-any + !!status && RETRYABLE_BATCH_STATUSES.has(status as any), + }; + + /** + * Mark a batch as successfully uploaded. Persists progress periodically based on + * `uploadLogInterval` to throttle IO and keep receipts compact. + * + * @param entries - Entries to mark as successful + */ + const markSuccessFor = async (entries: Array<[string, PreferenceUpdateItem]>): Promise => { + const successfulUpdates = receipts.getSuccessful(); + + for (const [userId] of entries) { + successfulUpdates[userId] = true; + delete pendingUpdates[userId]; + // Also keep the safe/conflict mirrors in sync in case of resume + delete pendingSafeUpdates[userId]; + delete pendingConflictUpdates[userId]; + } + uploadedCount += entries.length; + onProgress?.({ + successDelta: entries.length, + successTotal: uploadedCount, + fileTotal, + }); + + const shouldLog = + uploadedCount % uploadLogInterval === 0 || + Math.floor((uploadedCount - entries.length) / uploadLogInterval) < + Math.floor(uploadedCount / uploadLogInterval); + + if (shouldLog) { + logger.info( + colors.green( + `Uploaded ${uploadedCount}/${filtered.length} user preferences to partition ${partition}`, + ), + ); + await receipts.setSuccessful(successfulUpdates); + + await receipts.setPending(limitRecords(pendingUpdates, maxRecordsToReceipt)); + await receipts.setPendingSafe(limitRecords(pendingSafeUpdates, maxRecordsToReceipt)); + await receipts.setPendingConflict(pendingConflictUpdates); + } + }; + + /** + * Mark a single record failure with a concise, actionable error message. + * Mirrors are kept in sync to avoid reprocessing this row on resume. + * + * @param userId - User ID to mark as failed + * @param update - The update item that failed + * @param err - The error that occurred + */ + const markFailureForSingle = async ( + userId: string, + update: PreferenceUpdateItem, + err: unknown, + ): Promise => { + const msg = extractErrorMessage(err); + logger.error( + colors.red(`Failed to upload preferences for ${userId} (partition=${partition}): ${msg}`), + ); + const failing = receipts.getFailing(); + failing[userId] = { + uploadedAt: new Date().toISOString(), + update, + error: msg.includes('Identifier email did not pass validation') + ? 'Identifier email did not pass validation' + : msg, + }; + + delete pendingUpdates[userId]; + delete pendingSafeUpdates[userId]; + delete pendingConflictUpdates[userId]; + + await receipts.setFailing(failing); + }; + + /** + * Mark an entire batch as failed (used when we exhaust in-place retries for + * retryable statuses). Delegates to the single-failure handler per entry. + * + * @param entries - Entries to mark as failed + * @param err - The error that occurred + */ + const markFailureForBatch = async ( + entries: Array<[string, PreferenceUpdateItem]>, + err: unknown, + ): Promise => { + for (const [userId, update] of entries) { + await markFailureForSingle(userId, update, err); + } + }; + + const { + valid = [], + invalidAt = [], + invalidSlash = [], + } = groupBy(filtered, ([, update]) => + !update.identifiers + ? 'valid' + : update.identifiers.some((id) => id.name === 'email' && !id.value.includes('@')) + ? 'invalidAt' + : update.identifiers.some((id) => id.name === 'email' && id.value.includes('/')) + ? 'invalidSlash' + : 'valid', + ); + + if (invalidAt.length > 0) { + await markFailureForBatch(invalidAt, new Error('Invalid email format - missing @')); + } + if (invalidSlash.length > 0) { + await markFailureForBatch( + invalidSlash, + new Error('Invalid email format - email contains a slash (/)'), + ); + } + + if (valid.length === 0) { + logger.warn(colors.yellow('No updates to upload after validating emails.')); + await receipts.resetPending(); + return; + } + + // Kick off uploads in chunks; each chunk may be recursively split on errors + const chunks = chunk(valid, maxChunkSize); + await pMap( + chunks, + async (currentChunk) => { + await uploadChunkWithSplit( + currentChunk, + { + // Minimal transport surface for the uploader + putBatch: async (updates, opts) => { + await sombra + .put('v1/preferences', { + json: { + records: updates, + skipWorkflowTriggers: opts.skipWorkflowTriggers, + }, + }) + .json(); + }, + retryPolicy, + options: { skipWorkflowTriggers }, + isRetryableStatus: (s) => + // eslint-disable-next-line @typescript-eslint/no-explicit-any + !!s && RETRYABLE_BATCH_STATUSES.has(s as any), + }, + { + onSuccess: markSuccessFor, + onFailureSingle: ([userId, update], err) => markFailureForSingle(userId, update, err), + onFailureBatch: markFailureForBatch, + }, + ); + }, + { concurrency: uploadConcurrency }, + ); + + // Finalize receipts: persist success map and clear pending mirrors + await receipts.setSuccessful(receipts.getSuccessful()); + await receipts.resetPending(); + + const elapsedSec = (Date.now() - t0) / 1000; + logger.info( + colors.green( + `Successfully uploaded ${Object.keys(receipts.getSuccessful()).length} user preferences ` + + `to partition ${partition} in "${elapsedSec}" seconds!`, + ), + ); + + const remainingFailures = Object.values(receipts.getFailing()).length; + if (remainingFailures > 0) { + logger.error( + colors.red( + `There are ${remainingFailures} requests that failed to upload. ` + + `Please check the receipts file for details: ${receipts.receiptsFilepath}`, + ), + ); + } +} +/* eslint-enable no-param-reassign */ diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts new file mode 100644 index 00000000..8bda5a1e --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts @@ -0,0 +1,143 @@ +/** + * Module: transform/buildPendingUpdates + * + * Pure transformation from parsed CSV rows + schema mappings into + * PreferenceUpdateItem payloads, ready for upload. + */ +import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; + +import type { + PreferenceTopic, + FormattedAttribute, + Purpose, +} from '../../../../../lib/graphql/index.js'; +import { + getPreferenceIdentifiersFromRow, + getPreferenceUpdatesFromRow, + getPreferenceMetadataFromRow, + NONE_PREFERENCE_MAP, + type ColumnIdentifierMap, + type ColumnMetadataMap, + type ColumnPurposeMap, + type PendingSafePreferenceUpdates, + type PendingWithConflictPreferenceUpdates, +} from '../../../../../lib/preference-management/index.js'; + +export interface BuildPendingParams { + /** Safe updates keyed by user/primaryKey */ + safe: PendingSafePreferenceUpdates; + /** Conflict updates keyed by user/primaryKey (value.row contains row data) */ + conflicts: PendingWithConflictPreferenceUpdates; + /** Only upload safe updates (ignore conflicts entirely) */ + skipConflictUpdates: boolean; + /** Name of the column to use as the preference timestamp (if available) */ + timestampColumn?: string; + /** CSV column -> purpose/preference mapping */ + columnToPurposeName: ColumnPurposeMap; + /** CSV column -> identifier mapping */ + columnToIdentifier: ColumnIdentifierMap; + /** CSV column -> metadata key mapping (optional) */ + columnToMetadata?: ColumnMetadataMap; + /** Full set of preference topics for resolving row → preference values */ + preferenceTopics: PreferenceTopic[]; + /** Full set of purposes for resolving slugs/trackingTypes */ + purposes: Purpose[]; + /** Partition to attribute to every record */ + partition: string; + /** Static attributes injected into workflow settings */ + workflowAttrs: FormattedAttribute[]; + /** If true, downstream should avoid user-visible notifications */ + isSilent: boolean; + /** If true, skip triggering workflows downstream */ + skipWorkflowTriggers: boolean; + /** If true, force trigger workflows even if preferences haven't changed */ + forceTriggerWorkflows: boolean; +} + +/** + * Convert parsed CSV rows into a map of PreferenceUpdateItem payloads. + * + * This function is *pure* (no IO, logging or state writes) and therefore easy to test. + * + * @param params - Transformation inputs + * @returns Map of primaryKey -> PreferenceUpdateItem + */ +export function buildPendingUpdates( + params: BuildPendingParams, +): Record { + const { + safe, + conflicts, + skipConflictUpdates, + timestampColumn, + columnToPurposeName, + columnToIdentifier, + columnToMetadata, + preferenceTopics, + purposes, + partition, + workflowAttrs, + isSilent, + skipWorkflowTriggers, + forceTriggerWorkflows, + } = params; + + // If conflicts are to be included, normalize the shape to match `safe` rows. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const merged: Record = skipConflictUpdates + ? { ...safe } + : { + ...safe, + ...Object.fromEntries(Object.entries(conflicts).map(([id, v]) => [id, v.row])), + }; + + const purposeSlugs = purposes.map((x) => x.trackingType); + const out: Record = {}; + + for (const [userId, row] of Object.entries(merged)) { + // Determine timestamp used for the store + const ts = + timestampColumn === NONE_PREFERENCE_MAP || !timestampColumn + ? new Date() + : new Date(row[timestampColumn]); + + // Resolve purposes/preferences from columns using schema mappings + topics + const updates = getPreferenceUpdatesFromRow({ + row, + columnToPurposeName, + preferenceTopics, + purposeSlugs, + }); + + // Resolve identifiers per row (email, phone, userId, etc.) + const identifiers = getPreferenceIdentifiersFromRow({ + row, + columnToIdentifier, + }); + + // Resolve metadata from mapped columns (if any) + const metadata = columnToMetadata + ? getPreferenceMetadataFromRow({ row, columnToMetadata }) + : undefined; + + out[userId] = { + identifiers, + partition, + timestamp: ts.toISOString(), + purposes: Object.entries(updates).map(([purpose, value]) => ({ + ...value, + purpose, + workflowSettings: { + attributes: workflowAttrs, + isSilent, + skipWorkflowTrigger: skipWorkflowTriggers, + forceTriggerWorkflow: forceTriggerWorkflows, + }, + })), + // Only include metadata if there are values + ...(metadata && metadata.length > 0 ? { metadata } : {}), + }; + } + + return out; +} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts new file mode 100644 index 00000000..5f1f0911 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts @@ -0,0 +1,2 @@ +export * from './buildPendingUpdates.js'; +export * from './transformCsv.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts new file mode 100644 index 00000000..aaa25cfa --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts @@ -0,0 +1,80 @@ +// FIXME +import colors from 'colors'; + +import { logger } from '../../../../../logger.js'; + +/** + * Add Transcend ID to preferences if email_id is present + * + * @param preferences - List of preferences + * @returns The updated preferences with Transcend ID added + */ +export function transformCsv(preferences: Record[]): Record[] { + // Add a transcendent ID to each preference if it doesn't already exist + const disallowedEmails = (process.env.EMAIL_LIST || '') + .split(',') + .map((email) => email.trim().toLowerCase()); + + const keys = Object.keys(preferences[0]); + const isUdp = + keys.includes('email_address') && + keys.includes('person_id') && + keys.includes('member_id') && + keys.includes('birth_dt'); + if (isUdp) { + logger.info( + colors.yellow('Detected UDP format. Transforming preferences to include Transcend ID.'), + ); + + return preferences.map((pref) => { + const email = (pref.email_address || '').toLowerCase().trim(); + const emailAddress = !email || disallowedEmails.includes(email) ? '' : pref.email_address; + const birthDate = new Date(pref.birth_dt); + if (!!pref.birth_dt || Number.isNaN(birthDate.getTime())) { + logger.warn(colors.yellow(`No birth date for record: ${pref.email_address}`)); + } + return { + ...pref, + Minor: + !pref.birth_dt || Number.isNaN(birthDate.getTime()) + ? '' + : Date.now() - birthDate.getTime() < 1000 * 60 * 60 * 24 * 365 * 18 + ? 'True' + : 'False', + email_address: emailAddress, + // preference email address over transcendID + transcendID: emailAddress + ? '' + : pref.person_id && pref.person_id !== '-2' + ? pref.person_id + : pref.member_id, + }; + }); + } + + const isAdobe = + keys.includes('hashedCostcoID') && keys.includes('address') && keys.includes('lastUpdatedDate'); + if (isAdobe) { + logger.info(colors.green('Pre-processing as adobe ')); + return preferences.map((pref) => { + if (!pref.lastUpdatedDate) { + logger.warn( + colors.yellow( + `Record missing lastUpdatedDate - setting to now() - ${JSON.stringify(pref)}`, + ), + ); + } + return { + ...pref, + lastUpdatedDate: pref.lastUpdatedDate + ? pref.lastUpdatedDate + : new Date('08/24/2025').toISOString(), + }; + }); + } + + logger.info(colors.green('No special transformations applied.')); + + // FIXME skip the emails + return preferences; +} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/types.ts b/packages/cli/src/commands/consent/upload-preferences/upload/types.ts new file mode 100644 index 00000000..cb1e1b1f --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/upload/types.ts @@ -0,0 +1,8 @@ +export interface PreferenceUploadProgress { + /** how many records just succeeded */ + successDelta: number; + /** cumulative successes in this file */ + successTotal: number; + /** total records that will be uploaded in this file */ + fileTotal: number; +} diff --git a/packages/cli/src/commands/consent/upload-preferences/worker.ts b/packages/cli/src/commands/consent/upload-preferences/worker.ts new file mode 100644 index 00000000..a5550d61 --- /dev/null +++ b/packages/cli/src/commands/consent/upload-preferences/worker.ts @@ -0,0 +1,194 @@ +import { mkdirSync, createWriteStream } from 'node:fs'; +import { join, dirname } from 'node:path'; + +import { + buildTranscendGraphQLClient, + createSombraGotInstance, +} from '../../../lib/graphql/index.js'; +import type { ToWorker } from '../../../lib/pooling/index.js'; +import { splitCsvToList } from '../../../lib/requests/index.js'; +import { logger } from '../../../logger.js'; +import { getFilePrefix } from './artifacts/index.js'; +import { makeReceiptsState } from './artifacts/receipts/receiptsState.js'; +import type { TaskCommonOpts } from './buildTaskOptions.js'; +import { makeSchemaState } from './schemaState.js'; +import { + interactivePreferenceUploaderFromPlan, + buildInteractiveUploadPreferencePlan, +} from './upload/index.js'; + +/** + * Run the child process for handling upload preferences. + * This runs in a separate CPU if possible + */ +export async function runChild(): Promise { + // Get worker ID from environment or default to 0 + const workerId = Number(process.env.WORKER_ID || '0'); + + // Determine log file path from environment or default location + const logFile = process.env.WORKER_LOG || join(process.cwd(), `logs/worker-${workerId}.log`); + mkdirSync(dirname(logFile), { recursive: true }); + + // Create a writable stream for logging + const logStream = createWriteStream(logFile, { flags: 'a' }); + + // Helper function to write logs with timestamp and worker ID + const log = (...args: unknown[]): void => { + const line = `[w${workerId}] ${new Date().toISOString()} ${args + .map((a) => String(a)) + .join(' ')}\n`; + logStream.write(line); + }; + + // Log that the worker is ready and send a ready message to parent + logger.info(`[w${workerId}] ready pid=${process.pid}`); + process.send?.({ type: 'ready' }); + + // Listen for messages from the parent process + process.on( + 'message', + async ( + msg: ToWorker<{ + /** File path */ + filePath: string; + /** Options */ + options: TaskCommonOpts; + }>, + ) => { + if (!msg || typeof msg !== 'object') return; + + // Handle 'task' messages to process a file + if (msg.type === 'task') { + const { filePath, options } = msg.payload; + // Compute the path for receipts file + const receiptFilepath = join( + options.receiptsFolder, + `${getFilePrefix(filePath)}-receipts.json`, + ); + try { + // Ensure receipts directory exists + mkdirSync(dirname(receiptFilepath), { recursive: true }); + logger.info(`[w${workerId}] START ${filePath}`); + log(`START ${filePath}`); + + // Construct common state objects for the task + const receipts = await makeReceiptsState(receiptFilepath); + const schema = await makeSchemaState(options.schemaFile); + const client = buildTranscendGraphQLClient(options.transcendUrl, options.auth); + const sombra = await createSombraGotInstance( + options.transcendUrl, + options.auth, + options.sombraAuth, + ); + + // Derive identifierColumns and columnsToIgnore from config + const columnToIdentifier = schema.getColumnToIdentifier(); + const identifierColumns = Object.keys(columnToIdentifier); + const allowedIdentifierNames = [ + ...new Set(Object.values(columnToIdentifier).map((v) => v.name)), + ]; + const columnsToIgnore = schema.state.getValue('columnsToIgnore') ?? []; + + // Step 1: Build the upload plan (validation-only, non-interactive) + const plan = await buildInteractiveUploadPreferencePlan({ + sombra, + client, + file: filePath, + partition: options.partition, + receipts, + schema, + identifierDownloadLogInterval: options.uploadLogInterval * 10, + downloadIdentifierConcurrency: options.downloadIdentifierConcurrency, + skipExistingRecordCheck: options.skipExistingRecordCheck, + forceTriggerWorkflows: options.forceTriggerWorkflows, + allowedIdentifierNames, + maxRecordsToReceipt: options.maxRecordsToReceipt, + identifierColumns, + columnsToIgnore, + attributes: splitCsvToList(options.attributes), + nonInteractive: true, + // Report progress to parent process + onProgress: ({ successTotal, fileTotal }) => { + process.send?.({ + type: 'progress', + payload: { + filePath, + processed: successTotal, + total: fileTotal, + }, + }); + }, + }); + + // Step 2: Execute the upload using the plan + await interactivePreferenceUploaderFromPlan(plan, { + receipts, + sombra, + dryRun: options.dryRun, + isSilent: options.isSilent, + skipWorkflowTriggers: options.skipWorkflowTriggers, + skipConflictUpdates: options.skipConflictUpdates, + forceTriggerWorkflows: options.forceTriggerWorkflows, + uploadLogInterval: options.uploadLogInterval, + maxChunkSize: options.maxChunkSize, + uploadConcurrency: options.uploadConcurrency, + maxRecordsToReceipt: options.maxRecordsToReceipt, + // Report progress to parent process + onProgress: ({ successTotal, fileTotal }) => { + process.send?.({ + type: 'progress', + payload: { + filePath, + processed: successTotal, + total: fileTotal, + }, + }); + }, + }); + + // Log completion and send result to parent + logger.info(`[w${workerId}] DONE ${filePath}`); + log(`SUCCESS ${filePath}`); + + process.send?.({ + type: 'result', + payload: { ok: true, filePath, receiptFilepath }, + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + // Handle errors, log them, and send failure result to parent + const e = err?.stack || err?.message || String(err); + logger.error(`[w${workerId}] ERROR ${filePath}: ${err?.message || err}\n\n${e}`); + log(`FAIL ${filePath}\n${e}`); + process.send?.({ + type: 'result', + payload: { ok: false, filePath, error: e, receiptFilepath }, + }); + } + } else if (msg.type === 'shutdown') { + // Handle shutdown message: log and exit gracefully + logger.info(`[w${workerId}] shutdown`); + log('Shutting down.'); + logStream.end(() => process.exit(0)); + } + }, + ); + + // Handle uncaught exceptions: log and exit + process.on('uncaughtException', (err) => { + logger.error(`[w${workerId}] uncaughtException: ${err?.stack || err}`); + log(`uncaughtException\n${err?.stack || err}`); + logStream.end(() => process.exit(1)); + }); + // Handle unhandled promise rejections: log and exit + process.on('unhandledRejection', (reason) => { + logger.error(`[w${workerId}] unhandledRejection: ${String(reason)}`); + log(`unhandledRejection\n${String(reason)}`); + logStream.end(() => process.exit(1)); + }); + + // Keep the process alive indefinitely + await new Promise(() => { + // Keep the process alive + }); +} diff --git a/packages/cli/src/lib/graphql/gqls/RequestDataSilo.ts b/packages/cli/src/lib/graphql/gqls/RequestDataSilo.ts index 17b953ed..80391975 100644 --- a/packages/cli/src/lib/graphql/gqls/RequestDataSilo.ts +++ b/packages/cli/src/lib/graphql/gqls/RequestDataSilo.ts @@ -20,6 +20,7 @@ export const REQUEST_DATA_SILOS = gql` ) { nodes { id + status } totalCount } diff --git a/packages/cli/src/lib/pooling/logRotation.ts b/packages/cli/src/lib/pooling/logRotation.ts index ffe4062d..f6a56b3e 100644 --- a/packages/cli/src/lib/pooling/logRotation.ts +++ b/packages/cli/src/lib/pooling/logRotation.ts @@ -207,6 +207,7 @@ export function initLogDir(rootDir: string): string { const logDir = join(rootDir, 'logs'); mkdirSync(logDir, { recursive: true }); + // FIXME const RESET_MODE = (process.env.RESET_LOGS as 'truncate' | 'delete') ?? 'truncate'; resetWorkerLogs(logDir, RESET_MODE); @@ -243,15 +244,17 @@ export type ExportStatusMap = { /** * Return export statuses * - * @param logDir - Log directory + * FIXME what is this for? + * + * @param receiptsFolder - Receipts directory * @returns Export map */ -export function buildExportStatus(logDir: string): ExportStatusMap { +export function buildExportStatus(receiptsFolder: string): ExportStatusMap { return { - error: { path: join(logDir, 'combined-errors.log') }, - warn: { path: join(logDir, 'combined-warns.log') }, - info: { path: join(logDir, 'combined-info.log') }, - all: { path: join(logDir, 'combined-all.log') }, - failuresCsv: { path: join(logDir, 'failing-updates.csv') }, + error: { path: join(receiptsFolder, 'combined-errors.log') }, + warn: { path: join(receiptsFolder, 'combined-warns.log') }, + info: { path: join(receiptsFolder, 'combined-info.log') }, + all: { path: join(receiptsFolder, 'combined-all.log') }, + failuresCsv: { path: join(receiptsFolder, 'failing-updates.csv') }, }; } diff --git a/packages/cli/src/lib/preference-management/codecs.ts b/packages/cli/src/lib/preference-management/codecs.ts index 2a61a491..955457c3 100644 --- a/packages/cli/src/lib/preference-management/codecs.ts +++ b/packages/cli/src/lib/preference-management/codecs.ts @@ -87,47 +87,30 @@ export const ColumnMetadataMap = t.record(t.string, MetadataMapping); /** Override type */ export type ColumnMetadataMap = t.TypeOf; -export const FileMetadataState = t.intersection([ +export const FileFormatState = t.intersection([ t.type({ /** * Definition of how to map each column in the CSV to * the relevant purpose and preference definitions in transcend */ - columnToPurposeName: t.record(t.string, PurposeRowMapping), + columnToPurposeName: ColumnPurposeMap, /** Last time the file was last parsed at */ lastFetchedAt: t.string, - /** - * Mapping of userId to the rows in the file that need to be uploaded - * These uploads are overwriting non-existent preferences and are safe - */ - pendingSafeUpdates: t.record(t.string, t.record(t.string, t.string)), - /** - * Mapping of userId to the rows in the file that need to be uploaded - * these records have conflicts with existing consent preferences - */ - pendingConflictUpdates: t.record( - t.string, - t.type({ - record: PreferenceQueryResponseItem, - row: t.record(t.string, t.string), - }), - ), - /** - * Mapping of userId to the rows in the file that can be skipped because - * their preferences are already in the store - */ - skippedUpdates: t.record(t.string, t.record(t.string, t.string)), + /** The column name that maps to the identifier */ + columnToIdentifier: ColumnIdentifierMap, }), t.partial({ - /** Determine which column name in file maps to consent record identifier to upload on */ - identifierColumn: t.string, /** Determine which column name in file maps to the timestamp */ - timestampColum: t.string, + timestampColumn: t.string, + /** Mapping of CSV column names to metadata keys */ + columnToMetadata: ColumnMetadataMap, + /** CSV columns that should be ignored during upload */ + columnsToIgnore: t.array(t.string), }), ]); /** Override type */ -export type FileMetadataState = t.TypeOf; +export type FileFormatState = t.TypeOf; /** * This is the type of the receipts that are stored in the file @@ -219,36 +202,25 @@ export const SkippedPreferenceUpdates = t.record(t.string, t.record(t.string, t. /** Override type */ export type SkippedPreferenceUpdates = t.TypeOf; -/** Persist this data between runs of the script */ -export const PreferenceState = t.type({ - /** - * Store a cache of previous files read in - */ - fileMetadata: t.record(t.string, FileMetadataState), - /** - * The set of successful uploads to Transcend - * Mapping from userId to the upload metadata - */ - failingUpdates: t.record( - t.string, - t.type({ - /** Time upload ran at */ - uploadedAt: t.string, - /** Attempts to upload that resulted in an error */ - error: t.string, - /** The update body */ - update: PreferenceUpdateItem, - }), - ), - /** - * The set of pending uploads to Transcend - * Mapping from userId to the upload metadata - */ - pendingUpdates: t.record(t.string, PreferenceUpdateItem), +export const RequestUploadReceipts = t.type({ + /** Last time the file was last parsed at */ + lastFetchedAt: t.string, + /** Safe updates (no conflict with existing preferences) keyed by primaryKey */ + pendingSafeUpdates: PendingSafePreferenceUpdates, + /** Conflict updates (existing preferences differ) keyed by primaryKey */ + pendingConflictUpdates: PendingWithConflictPreferenceUpdates, + /** Skipped rows (already in store or duplicates) keyed by primaryKey */ + skippedUpdates: SkippedPreferenceUpdates, + /** Failed uploads keyed by primaryKey */ + failingUpdates: FailingPreferenceUpdates, + /** Pending uploads at time of last cache write; shrinks as processed */ + pendingUpdates: PreferenceUpdateMap, + /** Successfully processed uploads keyed by primaryKey */ + successfulUpdates: PreferenceUpdateMap, }); /** Override type */ -export type PreferenceState = t.TypeOf; +export type RequestUploadReceipts = t.TypeOf; export const DeletePreferenceRecordsInput = t.type({ /** Array of consent preference records to delete */ diff --git a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts b/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts index 71f0e15b..05c54d09 100644 --- a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts +++ b/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts @@ -1,12 +1,13 @@ import { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; import { decodeCodec } from '@transcend-io/type-utils'; -import cliProgress from 'cli-progress'; import colors from 'colors'; import type { Got } from 'got'; import { chunk } from 'lodash-es'; +import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; import { logger } from '../../logger.js'; import { map } from '../bluebird.js'; +import { extractErrorMessage, splitInHalf } from '../helpers/index.js'; import { ConsentPreferenceResponse } from './types.js'; import { withPreferenceRetry } from './withPreferenceRetry.js'; @@ -22,6 +23,8 @@ export async function getPreferencesForIdentifiers( { identifiers, partitionKey, + onProgress, + logInterval = 10000, skipLogging = false, concurrency = 40, }: { @@ -29,13 +32,19 @@ export async function getPreferencesForIdentifiers( identifiers: { /** The value of the identifier */ value: string; + /** The name of the identifier */ + name: string; }[]; /** The partition key to look up */ partitionKey: string; /** Whether to skip logging */ skipLogging?: boolean; - /** Concurrency for requests (default 40) */ + /** The interval to log upload progress */ + logInterval?: number; + /** Concurrency for fetching identifiers */ concurrency?: number; + /** on progress callback */ + onProgress?: (info: PreferenceUploadProgress) => void; }, ): Promise { const results: PreferenceQueryResponseItem[] = []; @@ -43,48 +52,144 @@ export async function getPreferencesForIdentifiers( // create a new progress bar instance and use shades_classic theme const t0 = new Date().getTime(); - const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); - if (!skipLogging) { - progressBar.start(identifiers.length, 0); - } let total = 0; - await map( - groupedIdentifiers, - async (group) => { - const rawResult = await withPreferenceRetry( - 'Preference Query', - () => - sombra - .post(`v1/preferences/${partitionKey}/query`, { - json: { - filter: { identifiers: group }, - limit: group.length, - }, - }) - .json(), - { - onRetry: (attempt, _err, msg) => { - logger.warn( - colors.yellow( - `[RETRY] group size=${group.length} partition=${partitionKey} attempt=${attempt}: ${msg}`, - ), - ); - }, - }, + onProgress?.({ + successDelta: 0, + successTotal: 0, + fileTotal: identifiers.length, // FIXME should be record not identifier count + }); + + /** + * Progress logger respecting `logInterval` + * + * @param delta - delta updated + */ + const maybeLogProgress = (delta: number): void => { + onProgress?.({ + successDelta: delta, + successTotal: total, + fileTotal: identifiers.length, + }); + + if (skipLogging) return; + const shouldLog = + total % logInterval === 0 || + Math.floor((total - identifiers.length) / logInterval) < Math.floor(total / logInterval); + if (shouldLog) { + logger.info( + colors.green( + `Fetched ${total}/${identifiers.length} user preferences from partition ${partitionKey}`, + ), ); + } + }; + + /** + * Attempt a single POST for a given group with transient retries. + * Returns decoded nodes on success. + * Throws an error on terminal failure. + * If the error contains "did not pass validation", it throws that error up + * so the caller can choose to split. + * + * @param group - The group of identifiers to fetch + * @returns The decoded nodes from the response + */ + const postGroupWithRetries = async ( + group: { + /** Value of the identifier */ + value: string; + /** Name of the identifier */ + name: string; + }[], + ): Promise => { + const rawResult = await withPreferenceRetry( + 'Preference Query', + () => + sombra + .post(`v1/preferences/${partitionKey}/query`, { + json: { + filter: { identifiers: group }, + }, + }) + .json(), + { + onRetry: (attempt, _err, msg) => { + logger.warn( + colors.yellow( + `[RETRY v1/preferences/${partitionKey}/query] ` + + `group size=${group.length} partition=${partitionKey} attempt=${attempt}: ${msg}`, + ), + ); + }, + }, + ); - const result = decodeCodec(ConsentPreferenceResponse, rawResult); - results.push(...result.nodes); + const result = decodeCodec(ConsentPreferenceResponse, rawResult); + return result.nodes; + }; + + /** + * Recursively process a group: + * - Try to fetch in one go. + * - If it fails with "did not pass validation", split into halves and recurse. + * - If the group becomes a singleton and still fails validation, skip it. + * In all terminal paths (success or skip), increment `total` by the + * number of identifiers accounted for and log progress. + * + * @param group - The group of identifiers to process + */ + const processGroup = async ( + group: { + /** Value of the identifier */ + value: string; + /** Name of the identifier */ + name: string; + }[], + ): Promise => { + try { + const nodes = await postGroupWithRetries(group); + results.push(...nodes); total += group.length; - progressBar.update(total); - }, - { - concurrency, + maybeLogProgress(group.length); + } catch (err) { + const msg = extractErrorMessage(err); + + if (/did not pass validation/i.test(msg)) { + // If single, skip and count it + if (group.length === 1) { + const only = group[0]; + logger.warn(colors.yellow(`Skipping identifier "${only.value}" (${only.name}): ${msg}`)); + total += 1; + maybeLogProgress(1); + return; + } + + // Otherwise, split and recurse + const [left, right] = splitInHalf(group); + logger.warn( + colors.yellow( + `Group of ${group.length} did not pass validation. Splitting into ${left.length} and ${right.length}.`, + ), + ); + await processGroup(left); + await processGroup(right); + return; + } + + // Non-validation terminal error: rethrow + throw err; + } + }; + + await map( + groupedIdentifiers, + async (group) => { + await processGroup(group); }, + { concurrency }, ); - progressBar.stop(); const t1 = new Date().getTime(); const totalTime = t1 - t0; diff --git a/packages/cli/src/lib/preference-management/index.ts b/packages/cli/src/lib/preference-management/index.ts index c43a0559..c0eecc03 100644 --- a/packages/cli/src/lib/preference-management/index.ts +++ b/packages/cli/src/lib/preference-management/index.ts @@ -1,4 +1,3 @@ -export * from './uploadPreferenceManagementPreferencesInteractive.js'; export * from './codecs.js'; export * from './getPreferencesForIdentifiers.js'; export * from './parsePreferenceManagementCsv.js'; @@ -6,7 +5,7 @@ export * from './getPreferenceUpdatesFromRow.js'; export * from './getPreferenceMetadataFromRow.js'; export * from './parsePreferenceManagementCsv.js'; export * from './parsePreferenceIdentifiersFromCsv.js'; -export * from './parsePreferenceTimestampsFromCsv.js'; +export * from './parsePreferenceFileFormatFromCsv.js'; export * from './parsePreferenceAndPurposeValuesFromCsv.js'; export * from './checkIfPendingPreferenceUpdatesAreNoOp.js'; export * from './checkIfPendingPreferenceUpdatesCauseConflict.js'; diff --git a/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts index 705851b0..3215e20a 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts @@ -1,3 +1,4 @@ +import type { PersistedState } from '@transcend-io/persisted-state'; import { PreferenceTopicType } from '@transcend-io/privacy-types'; import colors from 'colors'; import inquirer from 'inquirer'; @@ -7,25 +8,60 @@ import { logger } from '../../logger.js'; import { mapSeries } from '../bluebird.js'; import { PreferenceTopic } from '../graphql/index.js'; import { splitCsvToList } from '../requests/index.js'; -import { FileMetadataState } from './codecs.js'; +import { FileFormatState } from './codecs.js'; -/* eslint-disable no-param-reassign */ +/** Values that clearly mean "no preference recorded" and should map to null. */ +const NULL_VALUES = new Set(['', 'undefined', 'null', 'none', 'n/a', 'na']); + +const FALSY_VALUES = new Set([ + 'false', + '0', + 'no', + 'n', + 'off', + 'opt-out', + 'optout', + 'opt_out', + 'unsubscribed', +]); + +/** + * Check whether a raw CSV value represents "no data" and should map to null. + * + * @param value - raw CSV cell value + * @returns true when the value should be treated as null (no preference) + */ +function looksNull(value: string): boolean { + return NULL_VALUES.has(value.trim().toLowerCase()); +} + +/** + * Infer a sensible Y/n default for a purpose/preference value prompt. + * + * @param value - raw CSV cell value + * @returns true when the value looks like "opted-in" + */ +function looksOptedIn(value: string): boolean { + return !FALSY_VALUES.has(value.trim().toLowerCase()) && !looksNull(value); +} /** * Parse out the purpose.enabled and preference values from a CSV file * * @param preferences - List of preferences - * @param currentState - The current file metadata state for parsing this list + * @param schemaState - The schema state to use for parsing the file * @param options - Options * @returns The updated file metadata state */ export async function parsePreferenceAndPurposeValuesFromCsv( preferences: Record[], - currentState: FileMetadataState, + schemaState: PersistedState, { purposeSlugs, preferenceTopics, forceTriggerWorkflows, + columnsToIgnore, + nonInteractive = false, }: { /** The purpose slugs that are allowed to be updated */ purposeSlugs: string[]; @@ -33,19 +69,26 @@ export async function parsePreferenceAndPurposeValuesFromCsv( preferenceTopics: PreferenceTopic[]; /** Force workflow triggers */ forceTriggerWorkflows: boolean; + /** Columns to ignore in the CSV file */ + columnsToIgnore: string[]; + /** When true, throw instead of prompting (for worker processes) */ + nonInteractive?: boolean; }, -): Promise { +): Promise> { // Determine columns to map const columnNames = uniq(preferences.map((x) => Object.keys(x)).flat()); // Determine the columns that could potentially be used for identifier + const timestampCol = schemaState.getValue('timestampColumn'); const otherColumns = difference(columnNames, [ - ...(currentState.identifierColumn ? [currentState.identifierColumn] : []), - ...(currentState.timestampColum ? [currentState.timestampColum] : []), + ...Object.keys(schemaState.getValue('columnToIdentifier')), + ...(timestampCol ? [timestampCol] : []), + ...columnsToIgnore, + ...Object.keys(schemaState.getValue('columnToMetadata') ?? {}), ]); if (otherColumns.length === 0) { if (forceTriggerWorkflows) { - return currentState; + return schemaState; } throw new Error('No other columns to process'); } @@ -58,16 +101,24 @@ export async function parsePreferenceAndPurposeValuesFromCsv( // Ensure all columns are accounted for await mapSeries(otherColumns, async (col) => { - // Determine the unique values to map in this column - const uniqueValues = uniq(preferences.map((x) => x[col])); + // Determine the unique values to map in this column (including empty strings) + const uniqueValues = uniq(preferences.map((x) => x[col] ?? '')); // Map the column to a purpose - let purposeMapping = currentState.columnToPurposeName[col]; + const currentPurposeMapping = schemaState.getValue('columnToPurposeName'); + let purposeMapping = currentPurposeMapping[col]; if (purposeMapping) { logger.info( colors.magenta(`Column "${col}" is associated with purpose "${purposeMapping.purpose}"`), ); } else { + if (nonInteractive) { + throw new Error( + `Column "${col}" has no purpose mapping in the config. ` + + "Run 'transcend consent configure-preference-upload' to update the config.", + ); + } + const { purposeName } = await inquirer.prompt<{ /** purpose name */ purposeName: string; @@ -98,20 +149,44 @@ export async function parsePreferenceAndPurposeValuesFromCsv( ); return; } + + if (looksNull(value)) { + logger.info( + colors.magenta( + `Value "${value || '(empty)'}" for column "${col}" → null (no preference)`, + ), + ); + purposeMapping.valueMapping[value] = null as unknown as boolean; + return; + } + + if (nonInteractive) { + throw new Error( + `Value "${value}" for column "${col}" has no mapping in the config. ` + + "Run 'transcend consent configure-preference-upload' to update the config.", + ); + } + // if preference is null, this column is just for the purpose if (purposeMapping.preference === null) { const { purposeValue } = await inquirer.prompt<{ - /** purpose value */ - purposeValue: boolean; + /** The mapped purpose value chosen by the user */ + purposeValue: string; }>([ { name: 'purposeValue', - message: `Choose the purpose value for value "${value}" associated with purpose "${purposeMapping.purpose}"`, - type: 'confirm', - default: value !== 'false', + message: `Map value "${value}" for purpose "${purposeMapping.purpose}"`, + type: 'list', + choices: [ + { name: 'true (opted in)', value: 'true' }, + { name: 'false (opted out)', value: 'false' }, + { name: 'null (skip / no preference)', value: 'null' }, + ], + default: looksOptedIn(value) ? 'true' : 'false', }, ]); - purposeMapping.valueMapping[value] = purposeValue; + purposeMapping.valueMapping[value] = + purposeValue === 'null' ? (null as unknown as boolean) : purposeValue === 'true'; } // if preference is not null, this column is for a specific preference @@ -125,37 +200,47 @@ export async function parsePreferenceAndPurposeValuesFromCsv( if (preferenceTopic.type === PreferenceTopicType.Boolean) { const { preferenceValue } = await inquirer.prompt<{ - /** purpose value */ - preferenceValue: boolean; + /** The mapped boolean preference value chosen by the user */ + preferenceValue: string; }>([ { name: 'preferenceValue', - message: - // eslint-disable-next-line max-len - `Choose the preference value for "${preferenceTopic.slug}" value "${value}" associated with purpose "${purposeMapping.purpose}"`, - type: 'confirm', - default: value !== 'false', + message: `Map value "${value}" for preference "${preferenceTopic.slug}" (${purposeMapping.purpose})`, + type: 'list', + choices: [ + { name: 'true (opted in)', value: 'true' }, + { name: 'false (opted out)', value: 'false' }, + { name: 'null (skip / no preference)', value: 'null' }, + ], + default: looksOptedIn(value) ? 'true' : 'false', }, ]); - purposeMapping.valueMapping[value] = preferenceValue; + purposeMapping.valueMapping[value] = + preferenceValue === 'null' ? (null as unknown as boolean) : preferenceValue === 'true'; return; } if (preferenceTopic.type === PreferenceTopicType.Select) { + const choices = [ + ...preferenceOptions.map((o) => ({ name: o, value: o })), + { name: '(null — skip / no preference)', value: '__null__' }, + ]; const { preferenceValue } = await inquirer.prompt<{ - /** purpose value */ - preferenceValue: boolean; + /** The mapped select preference value chosen by the user */ + preferenceValue: string; }>([ { name: 'preferenceValue', - // eslint-disable-next-line max-len - message: `Choose the preference value for "${preferenceTopic.slug}" value "${value}" associated with purpose "${purposeMapping.purpose}"`, + message: `Map value "${value}" for preference "${preferenceTopic.slug}" (${purposeMapping.purpose})`, type: 'list', - choices: preferenceOptions, + choices, default: preferenceOptions.find((x) => x === value), }, ]); - purposeMapping.valueMapping[value] = preferenceValue; + purposeMapping.valueMapping[value] = + preferenceValue === '__null__' + ? (null as unknown as boolean) + : (preferenceValue as unknown as boolean); return; } @@ -167,20 +252,29 @@ export async function parsePreferenceAndPurposeValuesFromCsv( if (purposeMapping.valueMapping[parsedValue] !== undefined) { return; } + const msChoices = [ + ...preferenceOptions.map((o) => ({ name: o, value: o })), + { + name: '(null — skip / no preference)', + value: '__null__', + }, + ]; const { preferenceValue } = await inquirer.prompt<{ - /** purpose value */ - preferenceValue: boolean; + /** The mapped multi-select preference value chosen by the user */ + preferenceValue: string; }>([ { name: 'preferenceValue', - // eslint-disable-next-line max-len - message: `Choose the preference value for "${preferenceTopic.slug}" value "${parsedValue}" associated with purpose "${purposeMapping.purpose}"`, + message: `Map token "${parsedValue}" for preference "${preferenceTopic.slug}" (${purposeMapping.purpose})`, type: 'list', - choices: preferenceOptions, + choices: msChoices, default: preferenceOptions.find((x) => x === parsedValue), }, ]); - purposeMapping.valueMapping[parsedValue] = preferenceValue; + purposeMapping.valueMapping[parsedValue] = + preferenceValue === '__null__' + ? (null as unknown as boolean) + : (preferenceValue as unknown as boolean); }); return; } @@ -188,10 +282,9 @@ export async function parsePreferenceAndPurposeValuesFromCsv( throw new Error(`Unknown preference topic type: ${preferenceTopic.type}`); } }); - - currentState.columnToPurposeName[col] = purposeMapping; + currentPurposeMapping[col] = purposeMapping; + schemaState.setValue(currentPurposeMapping, 'columnToPurposeName'); }); - return currentState; + return schemaState; } -/* eslint-enable no-param-reassign */ diff --git a/packages/cli/src/lib/preference-management/parsePreferenceTimestampsFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts similarity index 54% rename from packages/cli/src/lib/preference-management/parsePreferenceTimestampsFromCsv.ts rename to packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts index 01cbb238..ea439596 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceTimestampsFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts @@ -1,16 +1,15 @@ +import type { PersistedState } from '@transcend-io/persisted-state'; import colors from 'colors'; import inquirer from 'inquirer'; import { uniq, difference } from 'lodash-es'; import { logger } from '../../logger.js'; -import { FileMetadataState } from './codecs.js'; +import { FileFormatState } from './codecs.js'; export const NONE_PREFERENCE_MAP = '[NONE]'; -/* eslint-disable no-param-reassign */ - /** - * Parse timestamps from a CSV list of preferences + * Parse timestamps and other file format mapping from a CSV list of preferences * * When timestamp is requested, this script * ensures that all rows have a valid timestamp. @@ -19,23 +18,36 @@ export const NONE_PREFERENCE_MAP = '[NONE]'; * * @param preferences - List of preferences * @param currentState - The current file metadata state for parsing this list + * @param options - Options * @returns The updated file metadata state */ -export async function parsePreferenceTimestampsFromCsv( +export async function parsePreferenceFileFormatFromCsv( preferences: Record[], - currentState: FileMetadataState, -): Promise { + currentState: PersistedState, + { + nonInteractive = false, + }: { + /** When true, throw instead of prompting */ nonInteractive?: boolean; + } = {}, +): Promise> { // Determine columns to map const columnNames = uniq(preferences.map((x) => Object.keys(x)).flat()); // Determine the columns that could potentially be used for timestamp const remainingColumnsForTimestamp = difference(columnNames, [ - ...(currentState.identifierColumn ? [currentState.identifierColumn] : []), - ...Object.keys(currentState.columnToPurposeName), + ...Object.keys(currentState.getValue('columnToIdentifier')), + ...Object.keys(currentState.getValue('columnToPurposeName')), ]); // Determine the timestamp column to work off of - if (!currentState.timestampColum) { + if (!currentState.getValue('timestampColumn')) { + if (nonInteractive) { + throw new Error( + 'No timestamp column configured. ' + + "Run 'transcend consent configure-preference-upload' to set it.", + ); + } + const { timestampName } = await inquirer.prompt<{ /** timestamp name */ timestampName: string; @@ -51,29 +63,31 @@ export async function parsePreferenceTimestampsFromCsv( choices: [...remainingColumnsForTimestamp, NONE_PREFERENCE_MAP], }, ]); - currentState.timestampColum = timestampName; + + currentState.setValue(timestampName, 'timestampColumn'); } - logger.info(colors.magenta(`Using timestamp column "${currentState.timestampColum}"`)); + logger.info( + colors.magenta(`Using timestamp column "${currentState.getValue('timestampColumn')}"`), + ); // Validate that all rows have valid timestamp - if (currentState.timestampColum !== NONE_PREFERENCE_MAP) { + if (currentState.getValue('timestampColumn') !== NONE_PREFERENCE_MAP) { const timestampColumnsMissing = preferences - .map((pref, ind) => (pref[currentState.timestampColum!] ? null : [ind])) + .map((pref, ind) => (pref[currentState.getValue('timestampColumn')!] ? null : [ind])) .filter((x): x is number[] => !!x) .flat(); if (timestampColumnsMissing.length > 0) { throw new Error( - `The timestamp column "${ - currentState.timestampColum - }" is missing a value for the following rows: ${timestampColumnsMissing.join('\n')}`, + `The timestamp column "${currentState.getValue( + 'timestampColumn', + )}" is missing a value for the following rows: ${timestampColumnsMissing.join('\n')}`, ); } logger.info( colors.magenta( - `The timestamp column "${currentState.timestampColum}" is present for all row`, + `The timestamp column "${currentState.getValue('timestampColumn')}" is present for all row`, ), ); } return currentState; } -/* eslint-enable no-param-reassign */ diff --git a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts index 1bfc7ed4..c1ed47cb 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts @@ -1,10 +1,17 @@ +import type { PersistedState } from '@transcend-io/persisted-state'; +import type { PreferenceStoreIdentifier } from '@transcend-io/privacy-types'; +import Bluebird from 'bluebird'; import colors from 'colors'; import inquirer from 'inquirer'; -import { uniq, groupBy, difference } from 'lodash-es'; +// groupBy +import { uniq, keyBy } from 'lodash-es'; import { logger } from '../../logger.js'; +import type { Identifier } from '../graphql/index.js'; import { inquirerConfirmBoolean } from '../helpers/index.js'; -import { FileMetadataState } from './codecs.js'; +import type { FileFormatState, IdentifierMetadataForPreference } from './codecs.js'; + +const { mapSeries } = Bluebird; /* eslint-disable no-param-reassign */ @@ -15,62 +22,129 @@ import { FileMetadataState } from './codecs.js'; * and that all identifiers are unique. * * @param preferences - List of preferences - * @param currentState - The current file metadata state for parsing this list + * @param options - Options * @returns The updated file metadata state */ export async function parsePreferenceIdentifiersFromCsv( preferences: Record[], - currentState: FileMetadataState, + { + schemaState, + orgIdentifiers, + allowedIdentifierNames, + identifierColumns, + nonInteractive = false, + }: { + /** The current state of the schema metadata */ + schemaState: PersistedState; + /** The list of identifiers configured for the org */ + orgIdentifiers: Identifier[]; + /** The list of identifier names that are allowed for this upload */ + allowedIdentifierNames: string[]; + /** The columns in the CSV that should be used as identifiers */ + identifierColumns: string[]; + /** When true, throw instead of prompting (for worker processes) */ + nonInteractive?: boolean; + }, ): Promise<{ /** The updated state */ - currentState: FileMetadataState; + schemaState: PersistedState; /** The updated preferences */ preferences: Record[]; }> { + const columnNames = uniq(preferences.map((x) => Object.keys(x)).flat()).filter((col) => + identifierColumns.includes(col), + ); // Determine columns to map - const columnNames = uniq(preferences.map((x) => Object.keys(x)).flat()); + const orgIdentifiersByName = keyBy(orgIdentifiers, 'name'); + const filteredOrgIdentifiers = allowedIdentifierNames + .map((name) => orgIdentifiersByName[name]) + .filter(Boolean); + if (filteredOrgIdentifiers.length !== allowedIdentifierNames.length) { + const missingIdentifiers = allowedIdentifierNames.filter((name) => !orgIdentifiersByName[name]); + throw new Error(`No identifier configuration found for "${missingIdentifiers.join('","')}"`); + } + if (columnNames.length !== identifierColumns.length) { + const missingColumns = identifierColumns.filter((col) => !columnNames.includes(col)); + throw new Error( + `The following identifier columns are missing from the CSV: "${missingColumns.join('","')}"`, + ); + } + + if ( + filteredOrgIdentifiers.filter((identifier) => identifier.isUniqueOnPreferenceStore).length === 0 + ) { + throw new Error( + 'No unique identifier was provided. Please ensure that at least one ' + + 'of the allowed identifiers is configured as unique on the preference store.', + ); + } + + // Determine the columns that could potentially be used for identifiers + const currentColumnToIdentifier = schemaState.getValue('columnToIdentifier'); + await mapSeries(identifierColumns, async (col) => { + // Map the column to an identifier + const identifierMapping = currentColumnToIdentifier[col]; + if (identifierMapping) { + logger.info( + colors.magenta(`Column "${col}" is associated with identifier "${identifierMapping.name}"`), + ); + return; + } - // Determine the columns that could potentially be used for identifier - const remainingColumnsForIdentifier = difference(columnNames, [ - ...(currentState.identifierColumn ? [currentState.identifierColumn] : []), - ...Object.keys(currentState.columnToPurposeName), - ]); + if (nonInteractive) { + throw new Error( + `Column "${col}" has no identifier mapping in the config. ` + + "Run 'transcend consent configure-preference-upload' to update the config.", + ); + } - // Determine the identifier column to work off of - if (!currentState.identifierColumn) { + // If the column is not mapped, ask the user to map it const { identifierName } = await inquirer.prompt<{ /** Identifier name */ identifierName: string; }>([ { name: 'identifierName', - message: - 'Choose the column that will be used as the identifier to upload consent preferences by', + message: `Choose the identifier name for column "${col}"`, type: 'list', - default: - remainingColumnsForIdentifier.find((col) => col.toLowerCase().includes('email')) || - remainingColumnsForIdentifier[0], - choices: remainingColumnsForIdentifier, + // Default to the first allowed identifier name + default: allowedIdentifierNames.find((x) => x.startsWith(col)), + choices: allowedIdentifierNames, }, ]); - currentState.identifierColumn = identifierName; - } - logger.info(colors.magenta(`Using identifier column "${currentState.identifierColumn}"`)); + currentColumnToIdentifier[col] = { + name: identifierName, + isUniqueOnPreferenceStore: orgIdentifiersByName[identifierName].isUniqueOnPreferenceStore, + }; + }); + schemaState.setValue(currentColumnToIdentifier, 'columnToIdentifier'); - // Validate that the identifier column is present for all rows and unique - const identifierColumnsMissing = preferences - .map((pref, ind) => (pref[currentState.identifierColumn!] ? null : [ind])) + const uniqueIdentifierColumns = Object.entries(currentColumnToIdentifier) + .filter(([, identifierMapping]) => identifierMapping.isUniqueOnPreferenceStore) + .map(([col]) => col); + + // Validate that the at least 1 unique identifier column is present + const uniqueIdentifierMissingIndexes = preferences + .map((pref, ind) => (uniqueIdentifierColumns.some((col) => !!pref[col]) ? null : [ind])) .filter((x): x is number[] => !!x) .flat(); - if (identifierColumnsMissing.length > 0) { - const msg = `The identifier column "${ - currentState.identifierColumn - }" is missing a value for the following rows: ${identifierColumnsMissing.join(', ')}`; + + if (uniqueIdentifierMissingIndexes.length > 0) { + const msg = ` + The following rows ${uniqueIdentifierMissingIndexes.join( + ', ', + )} do not have any unique identifier values for the columns "${uniqueIdentifierColumns.join( + '", "', + )}".`; logger.warn(colors.yellow(msg)); + if (nonInteractive) { + throw new Error(msg); + } + // Ask user if they would like to skip rows missing an identifier const skip = await inquirerConfirmBoolean({ - message: 'Would you like to skip rows missing an identifier?', + message: 'Would you like to skip rows missing unique identifiers?', }); if (!skip) { throw new Error(msg); @@ -78,49 +152,89 @@ export async function parsePreferenceIdentifiersFromCsv( // Filter out rows missing an identifier const previous = preferences.length; - preferences = preferences.filter((pref) => pref[currentState.identifierColumn!]); + preferences = preferences.filter( + (pref, index) => !uniqueIdentifierMissingIndexes.includes(index), + ); logger.info( - colors.yellow(`Skipped ${previous - preferences.length} rows missing an identifier`), + colors.yellow(`Skipped ${previous - preferences.length} rows missing unique identifiers`), ); } logger.info( colors.magenta( - `The identifier column "${currentState.identifierColumn}" is present for all rows`, + `At least one unique identifier column is present for all ${preferences.length} rows.`, ), ); - // Validate that all identifiers are unique - const rowsByUserId = groupBy(preferences, currentState.identifierColumn); - const duplicateIdentifiers = Object.entries(rowsByUserId).filter(([, rows]) => rows.length > 1); - if (duplicateIdentifiers.length > 0) { - const msg = `The identifier column "${ - currentState.identifierColumn - }" has duplicate values for the following rows: ${duplicateIdentifiers - .slice(0, 10) - .map(([userId, rows]) => `${userId} (${rows.length})`) - .join('\n')}`; - logger.warn(colors.yellow(msg)); + return { schemaState, preferences }; +} +/* eslint-enable no-param-reassign */ - // Ask user if they would like to take the most recent update - // for each duplicate identifier - const skip = await inquirerConfirmBoolean({ - message: 'Would you like to automatically take the latest update?', - }); - if (!skip) { - throw new Error(msg); - } - preferences = Object.entries(rowsByUserId) - .map(([, rows]) => { - const sorted = rows.sort( - (a, b) => - new Date(b[currentState.timestampColum!]).getTime() - - new Date(a[currentState.timestampColum!]).getTime(), - ); - return sorted[0]; - }) - .filter((x) => x); - } +/** + * Helper function to get the identifiers payload from a row + * + * @param options - Options + * @param options.row - The current row from CSV file + * @param options.columnToIdentifier - The column to identifier mapping metadata + * @returns The updated preferences with identifiers payload + */ +export function getPreferenceIdentifiersFromRow({ + row, + columnToIdentifier, +}: { + /** The current row from CSV file */ + row: Record; + /** The current file metadata state */ + columnToIdentifier: FileFormatState['columnToIdentifier']; +}): PreferenceStoreIdentifier[] { + const identifiers = Object.entries(columnToIdentifier) + .filter(([col]) => !!row[col]) + .map(([col, identifierMapping]) => ({ + name: identifierMapping.name, + value: row[col], + })); + // put email first if it exists + // TODO: https://linear.app/transcend/issue/PIK-285/set-precedence-of-unique-identifiers - remove email logic + return identifiers.sort( + (a, b) => + (a.name === 'email' ? -1 : 0) - (b.name === 'email' ? -1 : 0) || + a.name.localeCompare(b.name, undefined, { sensitivity: 'base' }), + ); +} - return { currentState, preferences }; +/** + * Helper function to get unique identifier name present in a row + * + * @param options - Options + * @param options.row - The current row from CSV file + * @param options.columnToIdentifier - The column to identifier mapping metadata + * @returns The unique identifier names present in the row + */ +export function getUniquePreferenceIdentifierNamesFromRow({ + row, + columnToIdentifier, +}: { + /** The current row from CSV file */ + row: Record; + /** The current file metadata state */ + columnToIdentifier: FileFormatState['columnToIdentifier']; +}): (IdentifierMetadataForPreference & { + /** Column name */ + columnName: string; + /** Value of the identifier in the row */ + value: string; +})[] { + // TODO: https://linear.app/transcend/issue/PIK-285/set-precedence-of-unique-identifiers - remove email logic + // sort email to the front + return Object.entries(columnToIdentifier) + .sort( + ([, a], [, b]) => + (a.name === 'email' ? -1 : 0) - (b.name === 'email' ? -1 : 0) || + a.name.localeCompare(b.name, undefined, { sensitivity: 'base' }), + ) + .filter(([col]) => row[col] && columnToIdentifier[col].isUniqueOnPreferenceStore) + .map(([col, identifier]) => ({ + ...identifier, + columnName: col, + value: row[col], + })); } -/* eslint-enable no-param-reassign */ diff --git a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts index a8118e80..cb89b579 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts @@ -1,30 +1,42 @@ import { PersistedState } from '@transcend-io/persisted-state'; +import type { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; +import type { ObjByString } from '@transcend-io/type-utils'; import colors from 'colors'; import type { Got } from 'got'; -import * as t from 'io-ts'; import { keyBy } from 'lodash-es'; +import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; import { logger } from '../../logger.js'; -import { PreferenceTopic } from '../graphql/index.js'; -import { readCsv } from '../requests/index.js'; +import { PreferenceTopic, type Identifier } from '../graphql/index.js'; import { checkIfPendingPreferenceUpdatesAreNoOp } from './checkIfPendingPreferenceUpdatesAreNoOp.js'; import { checkIfPendingPreferenceUpdatesCauseConflict } from './checkIfPendingPreferenceUpdatesCauseConflict.js'; -import { FileMetadataState, PreferenceState } from './codecs.js'; +import { + type FileFormatState, + type PendingSafePreferenceUpdates, + type PendingWithConflictPreferenceUpdates, + type RequestUploadReceipts, + type SkippedPreferenceUpdates, +} from './codecs.js'; import { getPreferencesForIdentifiers } from './getPreferencesForIdentifiers.js'; import { getPreferenceUpdatesFromRow } from './getPreferenceUpdatesFromRow.js'; import { parsePreferenceAndPurposeValuesFromCsv } from './parsePreferenceAndPurposeValuesFromCsv.js'; -import { parsePreferenceIdentifiersFromCsv } from './parsePreferenceIdentifiersFromCsv.js'; -import { parsePreferenceTimestampsFromCsv } from './parsePreferenceTimestampsFromCsv.js'; +import { parsePreferenceFileFormatFromCsv } from './parsePreferenceFileFormatFromCsv.js'; +import { + getUniquePreferenceIdentifierNamesFromRow, + parsePreferenceIdentifiersFromCsv, +} from './parsePreferenceIdentifiersFromCsv.js'; /** * Parse a file into the cache * * + * @param rawPreferences - The preferences to parse * @param options - Options - * @param cache - The cache to store the parsed file in + * @param schemaState - The schema state to use for parsing the file * @returns The cache with the parsed file */ export async function parsePreferenceManagementCsvWithCache( + rawPreferences: Record[], { file, sombra, @@ -33,6 +45,14 @@ export async function parsePreferenceManagementCsvWithCache( partitionKey, skipExistingRecordCheck, forceTriggerWorkflows, + orgIdentifiers, + allowedIdentifierNames, + identifierColumns, + downloadIdentifierConcurrency, + identifierDownloadLogInterval, + columnsToIgnore, + onProgress, + nonInteractive = false, }: { /** File to parse */ file: string; @@ -48,85 +68,162 @@ export async function parsePreferenceManagementCsvWithCache( skipExistingRecordCheck: boolean; /** Whether to force workflow triggers */ forceTriggerWorkflows: boolean; + /** Identifiers configured for the org */ + orgIdentifiers: Identifier[]; + /** allowed identifiers names */ + allowedIdentifierNames: string[]; + /** Identifier columns on the CSV file */ + identifierColumns: string[]; + /** Columns to ignore in the CSV file */ + columnsToIgnore: string[]; + /** The interval to log upload progress */ + identifierDownloadLogInterval: number; + /** Concurrency for downloading identifiers */ + downloadIdentifierConcurrency: number; + /** on progress callback */ + onProgress?: (info: PreferenceUploadProgress) => void; + /** When true, throw instead of prompting (for worker processes) */ + nonInteractive?: boolean; }, - cache: PersistedState, -): Promise { + schemaState: PersistedState, +): Promise<{ + /** Pending saf updates */ + pendingSafeUpdates: PendingSafePreferenceUpdates; + /** Pending conflict updates */ + pendingConflictUpdates: PendingWithConflictPreferenceUpdates; + /** Skipped updates */ + skippedUpdates: SkippedPreferenceUpdates; +}> { // Start the timer const t0 = new Date().getTime(); - // Get the current metadata - const fileMetadata = cache.getValue('fileMetadata'); - - // Read in the file - logger.info(colors.magenta(`Reading in file: "${file}"`)); - let preferences = readCsv(file, t.record(t.string, t.string)); - - // start building the cache, can use previous cache as well - let currentState: FileMetadataState = { - columnToPurposeName: {}, - pendingSafeUpdates: {}, - pendingConflictUpdates: {}, - skippedUpdates: {}, - // Load in the last fetched time - ...((fileMetadata[file] || {}) as Partial), - lastFetchedAt: new Date().toISOString(), - }; - // Validate that all timestamps are present in the file - currentState = await parsePreferenceTimestampsFromCsv(preferences, currentState); - fileMetadata[file] = currentState; - await cache.setValue(fileMetadata, 'fileMetadata'); + await parsePreferenceFileFormatFromCsv(rawPreferences, schemaState, { + nonInteractive, + }); // Validate that all identifiers are present and unique - const result = await parsePreferenceIdentifiersFromCsv(preferences, currentState); - currentState = result.currentState; - preferences = result.preferences; - fileMetadata[file] = currentState; - await cache.setValue(fileMetadata, 'fileMetadata'); - - // Ensure all other columns are mapped to purpose and preference - // slug values - currentState = await parsePreferenceAndPurposeValuesFromCsv(preferences, currentState, { + const result = await parsePreferenceIdentifiersFromCsv(rawPreferences, { + schemaState, + orgIdentifiers, + allowedIdentifierNames, + identifierColumns, + nonInteractive, + }); + const { preferences } = result; + + // Ensure all other columns are mapped to purpose and preference slug values + await parsePreferenceAndPurposeValuesFromCsv(preferences, schemaState, { preferenceTopics, purposeSlugs, forceTriggerWorkflows, + columnsToIgnore, + nonInteractive, }); - fileMetadata[file] = currentState; - await cache.setValue(fileMetadata, 'fileMetadata'); // Grab existing preference store records - const identifiers = preferences.map((pref) => pref[currentState.identifierColumn!]); + const currentColumnToIdentifierMap = schemaState.getValue('columnToIdentifier'); + const currentColumnToPurposeName = schemaState.getValue('columnToPurposeName'); + const identifiers = preferences.flatMap((pref) => + getUniquePreferenceIdentifierNamesFromRow({ + row: pref, + columnToIdentifier: currentColumnToIdentifierMap, + }), + ); + const existingConsentRecords = skipExistingRecordCheck ? [] : await getPreferencesForIdentifiers(sombra, { - identifiers: identifiers.map((x) => ({ value: x })), + identifiers, + logInterval: identifierDownloadLogInterval, partitionKey, + concurrency: downloadIdentifierConcurrency, + onProgress, }); - const consentRecordByIdentifier = keyBy(existingConsentRecords, 'userId'); + + // Create a map of all unique identifiers to consent records + const uniqueIdentifiers = Object.values(currentColumnToIdentifierMap) + .filter((x) => x.isUniqueOnPreferenceStore) + .map((x) => x.name); + const consentRecordByUniqueIdentifiers = uniqueIdentifiers.reduce( + (acc, identifier) => { + const recordsWithIdentifier = existingConsentRecords.filter((record) => + (record.identifiers || []).some((id) => id.name === identifier && id.value), + ); + acc[identifier] = keyBy( + recordsWithIdentifier, + (record) => (record.identifiers || []).find((id) => id.name === identifier)?.value || '', + ); + return acc; + }, + {} as Record>, + ); // Clear out previous updates - currentState.pendingConflictUpdates = {}; - currentState.pendingSafeUpdates = {}; - currentState.skippedUpdates = {}; + const pendingConflictUpdates: RequestUploadReceipts['pendingConflictUpdates'] = {}; + const pendingSafeUpdates: Record> = {}; + const skippedUpdates: RequestUploadReceipts['skippedUpdates'] = {}; // Process each row - preferences.forEach((pref) => { - // Grab unique Id for the user - const userId = pref[currentState.identifierColumn!]; + const seenAlready: Record = {}; + logger.log( + colors.green( + `Processing ${preferences.length} preferences with ${ + Object.keys(currentColumnToIdentifierMap).length + } identifiers and ${Object.keys(currentColumnToPurposeName).length} purposes.`, + ), + ); + preferences.forEach((pref, ind) => { + // Get the userIds that could be the primary key of the consent record + const uniqueIdentifiers = getUniquePreferenceIdentifierNamesFromRow({ + row: pref, + columnToIdentifier: currentColumnToIdentifierMap, + }); // determine updates for user const pendingUpdates = getPreferenceUpdatesFromRow({ row: pref, - columnToPurposeName: currentState.columnToPurposeName, + columnToPurposeName: currentColumnToPurposeName, preferenceTopics, purposeSlugs, }); // Grab current state of the update - const currentConsentRecord = consentRecordByIdentifier[userId]; + const primaryKeyMetadata = uniqueIdentifiers[0]; + const currentConsentRecord = + consentRecordByUniqueIdentifiers[primaryKeyMetadata.name][primaryKeyMetadata.value]; + // If consent record is found use it, otherwise use the first unique identifier + let primaryKey = primaryKeyMetadata.value; + // Ensure this is unique + if (seenAlready[primaryKey]) { + if (!Object.entries(pref).every(([key, value]) => seenAlready[primaryKey][key] === value)) { + // Show a diff of what's changed between the duplicate rows + const previous = seenAlready[primaryKey]; + const diffs = Object.entries(pref) + .filter(([key, value]) => previous[key] !== value) + .map(([key]) => key) + .join(', '); + logger.warn( + colors.yellow(`Duplicate primary key "${primaryKey}" at index ${ind}. Diff: ${diffs}`), + ); + primaryKey = `${primaryKey}___${ind}`; + } else { + skippedUpdates[`${primaryKey}___${ind}`] = pref; + logger.warn( + colors.yellow( + `Duplicate primary key found: "${primaryKey}" at index: "${ind}" but rows are identical.`, + ), + ); + return; + } + } + seenAlready[primaryKey] = pref; + if (forceTriggerWorkflows && !currentConsentRecord) { throw new Error( - `No existing consent record found for user with id: ${userId}. + `No existing consent record found for user with ids: ${uniqueIdentifiers + .map((x) => x.value) + .join(', ')}. When 'forceTriggerWorkflows' is set all the user identifiers should contain a consent record`, ); } @@ -142,7 +239,7 @@ export async function parsePreferenceManagementCsvWithCache( }) && !forceTriggerWorkflows ) { - currentState.skippedUpdates[userId] = pref; + skippedUpdates[primaryKey] = pref; return; } @@ -153,9 +250,10 @@ export async function parsePreferenceManagementCsvWithCache( currentConsentRecord, pendingUpdates, preferenceTopics, + log: false, // update this to log for debugging purposes }) ) { - currentState.pendingConflictUpdates[userId] = { + pendingConflictUpdates[primaryKey] = { row: pref, record: currentConsentRecord, }; @@ -163,12 +261,15 @@ export async function parsePreferenceManagementCsvWithCache( } // Add to pending updates - currentState.pendingSafeUpdates[userId] = pref; + pendingSafeUpdates[primaryKey] = pref; }); - // Read in the file - fileMetadata[file] = currentState; - await cache.setValue(fileMetadata, 'fileMetadata'); const t1 = new Date().getTime(); logger.info(colors.green(`Successfully pre-processed file: "${file}" in ${(t1 - t0) / 1000}s`)); + + return { + pendingSafeUpdates, + pendingConflictUpdates, + skippedUpdates, + }; } diff --git a/packages/cli/src/lib/preference-management/tests/fetchConsentPreferencesChunked.test.ts b/packages/cli/src/lib/preference-management/tests/fetchConsentPreferencesChunked.test.ts index e7824470..ab465132 100644 --- a/packages/cli/src/lib/preference-management/tests/fetchConsentPreferencesChunked.test.ts +++ b/packages/cli/src/lib/preference-management/tests/fetchConsentPreferencesChunked.test.ts @@ -47,7 +47,7 @@ const H = vi.hoisted(() => ({ // Each call to iterateConsentPages will shift one generator from here: iterators: [] as Array>, makeIter: (pages: PreferenceQueryResponseItem[][]) => - // eslint-disable-next-line wrap-iife + // eslint-disable-next-line wrap-iife, func-names (async function* () { for (const p of pages) yield p; })(), diff --git a/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts b/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts index 56ca3d21..a7d331f6 100644 --- a/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts +++ b/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts @@ -63,9 +63,13 @@ vi.mock('../../bluebird.js', () => ({ })); // decodeCodec should just return what we expect to consume -vi.mock('@transcend-io/type-utils', () => ({ - decodeCodec: vi.fn((_codec, raw) => raw), -})); +vi.mock('@transcend-io/type-utils', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + decodeCodec: vi.fn((_codec, raw) => raw), + }; +}); // withPreferenceRetry should invoke the provided fn and return its result, // but we still want to see that it's being called. @@ -93,6 +97,7 @@ describe('getPreferencesForIdentifiers', () => { // Build 250 identifiers -> 3 groups: 100, 100, 50 const identifiers = Array.from({ length: 250 }, (_, i) => ({ value: `user-${i + 1}@ex.com`, + name: 'email', })); // Fake Got client with post().json() chain that returns a result based on the requested group @@ -114,8 +119,6 @@ describe('getPreferencesForIdentifiers', () => { name: string; }[]; }; - /** Limit */ - limit: number; }; }, ) => { @@ -155,17 +158,14 @@ describe('getPreferencesForIdentifiers', () => { const call2Json = postMock.mock.calls[1][1].json; const call3Json = postMock.mock.calls[2][1].json; - expect(call1Json.limit).toBe(100); expect(call1Json.filter.identifiers).toHaveLength(100); expect(call1Json.filter.identifiers[0].value).toBe('user-1@ex.com'); expect(call1Json.filter.identifiers[99].value).toBe('user-100@ex.com'); - expect(call2Json.limit).toBe(100); expect(call2Json.filter.identifiers).toHaveLength(100); expect(call2Json.filter.identifiers[0].value).toBe('user-101@ex.com'); expect(call2Json.filter.identifiers[99].value).toBe('user-200@ex.com'); - expect(call3Json.limit).toBe(50); expect(call3Json.filter.identifiers).toHaveLength(50); expect(call3Json.filter.identifiers[0].value).toBe('user-201@ex.com'); expect(call3Json.filter.identifiers[49].value).toBe('user-250@ex.com'); @@ -174,11 +174,10 @@ describe('getPreferencesForIdentifiers', () => { expect(out).toHaveLength(250); expect(out).toHaveLength(250); - // Progress bar: start not called when skipLogging=true, but update/stop still invoked + // Progress bar is not used by the current implementation. expect(H.progressBar.start).not.toHaveBeenCalled(); - // We update once per group - expect(H.progressBar.update).toHaveBeenCalledTimes(3); - expect(H.progressBar.stop).toHaveBeenCalledTimes(1); + expect(H.progressBar.update).not.toHaveBeenCalled(); + expect(H.progressBar.stop).not.toHaveBeenCalled(); // Logger.info only at the end when !skipLogging, so not in this test expect(H.loggerSpies.info).not.toHaveBeenCalled(); @@ -195,6 +194,7 @@ describe('getPreferencesForIdentifiers', () => { it('logs progress start and completion when skipLogging=false', async () => { const identifiers = Array.from({ length: 5 }, (_, i) => ({ value: `u${i + 1}`, + name: 'test-id', })); const postMock = vi.fn( @@ -215,8 +215,6 @@ describe('getPreferencesForIdentifiers', () => { name: string; }[]; }; - /** Limit */ - limit: number; }; }, ) => { @@ -246,9 +244,8 @@ describe('getPreferencesForIdentifiers', () => { }); expect(out).toHaveLength(5); - expect(H.progressBar.start).toHaveBeenCalledTimes(1); - expect(H.progressBar.start).toHaveBeenCalledWith(5, 0); - expect(H.progressBar.stop).toHaveBeenCalledTimes(1); + expect(H.progressBar.start).not.toHaveBeenCalled(); + expect(H.progressBar.stop).not.toHaveBeenCalled(); // Completion info log called once expect(H.loggerSpies.info).toHaveBeenCalledTimes(1); diff --git a/packages/cli/src/lib/preference-management/uploadPreferenceManagementPreferencesInteractive.ts b/packages/cli/src/lib/preference-management/uploadPreferenceManagementPreferencesInteractive.ts deleted file mode 100644 index a462f309..00000000 --- a/packages/cli/src/lib/preference-management/uploadPreferenceManagementPreferencesInteractive.ts +++ /dev/null @@ -1,267 +0,0 @@ -import { PersistedState } from '@transcend-io/persisted-state'; -import { PreferenceUpdateItem } from '@transcend-io/privacy-types'; -import { apply } from '@transcend-io/type-utils'; -import cliProgress from 'cli-progress'; -import colors from 'colors'; -import { chunk } from 'lodash-es'; - -import { logger } from '../../logger.js'; -import { map } from '../bluebird.js'; -import { - buildTranscendGraphQLClient, - createSombraGotInstance, - fetchAllPurposes, - fetchAllPreferenceTopics, -} from '../graphql/index.js'; -import { parseAttributesFromString } from '../requests/index.js'; -import { PreferenceState } from './codecs.js'; -import { getPreferenceUpdatesFromRow } from './getPreferenceUpdatesFromRow.js'; -import { parsePreferenceManagementCsvWithCache } from './parsePreferenceManagementCsv.js'; -import { NONE_PREFERENCE_MAP } from './parsePreferenceTimestampsFromCsv.js'; - -/** - * Upload a set of consent preferences - * - * @param options - Options - */ -export async function uploadPreferenceManagementPreferencesInteractive({ - auth, - sombraAuth, - receiptFilepath, - file, - partition, - isSilent = true, - dryRun = false, - skipWorkflowTriggers = false, - skipConflictUpdates = false, - skipExistingRecordCheck = false, - attributes = [], - transcendUrl, - forceTriggerWorkflows = false, -}: { - /** The Transcend API key */ - auth: string; - /** Sombra API key authentication */ - sombraAuth?: string; - /** Partition key */ - partition: string; - /** File where to store receipt and continue from where left off */ - receiptFilepath: string; - /** The file to process */ - file: string; - /** API URL for Transcend backend */ - transcendUrl: string; - /** Whether to do a dry run */ - dryRun?: boolean; - /** Whether to upload as isSilent */ - isSilent?: boolean; - /** Attributes string pre-parse. In format Key:Value */ - attributes?: string[]; - /** Skip workflow triggers */ - skipWorkflowTriggers?: boolean; - /** - * When true, only update preferences that do not conflict with existing - * preferences. When false, update all preferences in CSV based on timestamp. - */ - skipConflictUpdates?: boolean; - /** Whether to skip the check for existing records. SHOULD ONLY BE USED FOR INITIAL UPLOAD */ - skipExistingRecordCheck?: boolean; - /** Whether to force trigger workflows */ - forceTriggerWorkflows?: boolean; -}): Promise { - // Parse out the extra attributes to apply to all requests uploaded - const parsedAttributes = parseAttributesFromString(attributes); - - // Create a new state file to store the requests from this run - const preferenceState = new PersistedState(receiptFilepath, PreferenceState, { - fileMetadata: {}, - failingUpdates: {}, - pendingUpdates: {}, - }); - const failingRequests = preferenceState.getValue('failingUpdates'); - const pendingRequests = preferenceState.getValue('pendingUpdates'); - let fileMetadata = preferenceState.getValue('fileMetadata'); - - logger.info( - colors.magenta( - 'Restored cache, there are: \n' + - `${Object.values(failingRequests).length} failing requests to be retried\n` + - `${Object.values(pendingRequests).length} pending requests to be processed\n` + - `The following files are stored in cache and will be used:\n${Object.keys(fileMetadata) - .map((x) => x) - .join('\n')}\n` + - `The following file will be processed: ${file}\n`, - ), - ); - - // Create GraphQL client to connect to Transcend backend - const client = buildTranscendGraphQLClient(transcendUrl, auth); - - const [sombra, purposes, preferenceTopics] = await Promise.all([ - // Create sombra instance to communicate with - createSombraGotInstance(transcendUrl, auth, sombraAuth), - // get all purposes and topics - fetchAllPurposes(client), - fetchAllPreferenceTopics(client), - ]); - - // Process the file - await parsePreferenceManagementCsvWithCache( - { - file, - purposeSlugs: purposes.map((x) => x.trackingType), - preferenceTopics, - sombra, - partitionKey: partition, - skipExistingRecordCheck, - forceTriggerWorkflows, - }, - preferenceState, - ); - - // Construct the pending updates - const pendingUpdates: Record = {}; - fileMetadata = preferenceState.getValue('fileMetadata'); - const metadata = fileMetadata[file]; - - logger.info( - colors.magenta( - `Found ${Object.entries(metadata.pendingSafeUpdates).length} safe updates in ${file}`, - ), - ); - logger.info( - colors.magenta( - `Found ${Object.entries(metadata.pendingConflictUpdates).length} conflict updates in ${file}`, - ), - ); - logger.info( - colors.magenta( - `Found ${Object.entries(metadata.skippedUpdates).length} skipped updates in ${file}`, - ), - ); - - // Update either safe updates only or safe + conflict - Object.entries({ - ...metadata.pendingSafeUpdates, - ...(skipConflictUpdates ? {} : apply(metadata.pendingConflictUpdates, ({ row }) => row)), - }).forEach(([userId, update]) => { - // Determine timestamp - const timestamp = - metadata.timestampColum === NONE_PREFERENCE_MAP - ? new Date() - : new Date(update[metadata.timestampColum!]); - - // Determine updates - const updates = getPreferenceUpdatesFromRow({ - row: update, - columnToPurposeName: metadata.columnToPurposeName, - preferenceTopics, - purposeSlugs: purposes.map((x) => x.trackingType), - }); - pendingUpdates[userId] = { - userId, - partition, - timestamp: timestamp.toISOString(), - purposes: Object.entries(updates).map(([purpose, value]) => ({ - ...value, - purpose, - workflowSettings: { - attributes: parsedAttributes, - isSilent, - skipWorkflowTrigger: skipWorkflowTriggers, - ...(forceTriggerWorkflows ? { forceTriggerWorkflow: forceTriggerWorkflows } : {}), - }, - })), - }; - }); - await preferenceState.setValue(pendingUpdates, 'pendingUpdates'); - await preferenceState.setValue({}, 'failingUpdates'); - - // Exist early if dry run - if (dryRun) { - logger.info( - colors.green( - `Dry run complete, exiting. ${ - Object.values(pendingUpdates).length - } pending updates. Check file: ${receiptFilepath}`, - ), - ); - return; - } - - logger.info( - colors.magenta( - `Uploading ${Object.values(pendingUpdates).length} preferences to partition: ${partition}`, - ), - ); - - // Time duration - const t0 = new Date().getTime(); - - // create a new progress bar instance and use shades_classic theme - const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); - - // Build a GraphQL client - let total = 0; - const updatesToRun = Object.entries(pendingUpdates); - const chunkedUpdates = chunk(updatesToRun, skipWorkflowTriggers ? 100 : 10); - progressBar.start(updatesToRun.length, 0); - await map( - chunkedUpdates, - async (currentChunk) => { - // Make the request - try { - await sombra - .put('v1/preferences', { - json: { - records: currentChunk.map(([, update]) => update), - skipWorkflowTriggers, - }, - }) - .json(); - } catch (err) { - try { - const parsed = JSON.parse(err?.response?.body || '{}'); - if (parsed.error) { - logger.error(colors.red(`Error: ${parsed.error}`)); - } - } catch (e) { - // continue - } - logger.error( - colors.red( - `Failed to upload ${currentChunk.length} user preferences to partition ${partition}: ${ - err?.response?.body || err?.message - }`, - ), - ); - const failingUpdates = preferenceState.getValue('failingUpdates'); - currentChunk.forEach(([userId, update]) => { - failingUpdates[userId] = { - uploadedAt: new Date().toISOString(), - update, - error: err?.response?.body || err?.message || 'Unknown error', - }; - }); - await preferenceState.setValue(failingUpdates, 'failingUpdates'); - } - - total += currentChunk.length; - progressBar.update(total); - }, - { - concurrency: 40, - }, - ); - - progressBar.stop(); - const t1 = new Date().getTime(); - const totalTime = t1 - t0; - logger.info( - colors.green( - `Successfully uploaded ${ - updatesToRun.length - } user preferences to partition ${partition} in "${totalTime / 1000}" seconds!`, - ), - ); -} diff --git a/packages/cli/src/lib/tests/codebase.test.ts b/packages/cli/src/lib/tests/codebase.test.ts index 43084f08..d412a5ea 100644 --- a/packages/cli/src/lib/tests/codebase.test.ts +++ b/packages/cli/src/lib/tests/codebase.test.ts @@ -204,7 +204,15 @@ describe('CLI Command Structure', () => { test('No unexpected files in command directories', () => { // Required + optional files in leaf command dirs const requiredFiles = ['command.ts', 'impl.ts']; - const optionalFiles = ['readme.ts', 'helpers.ts', 'types.ts', 'worker.ts', 'constants.ts']; + const optionalFiles = [ + 'readme.ts', + 'helpers.ts', + 'types.ts', + 'worker.ts', + 'constants.ts', + 'buildTaskOptions.ts', + 'schemaState.ts', + ]; // Allowed subdirectories in leaf command dirs const allowedDirs = ['artifacts', 'ui', 'upload', 'tests', '__mocks__', '__snapshots__']; From de2cb542298a44201dcb8913cb343ccf584ed527 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 12:23:43 -0700 Subject: [PATCH 02/10] Handles directory --- mise.lock | 22 +++++++++++++++---- .../consent/upload-preferences/impl.ts | 6 ++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/mise.lock b/mise.lock index 8c6c242f..75871b58 100644 --- a/mise.lock +++ b/mise.lock @@ -1,7 +1,21 @@ +# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html + [[tools.node]] version = "22.22.0" backend = "core:node" -"platforms.linux-arm64" = { checksum = "sha256:25ba95dfb96871fa2ef977f11f95ea90818c8fa15c0f2110771db08d4ba423be", url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-linux-arm64.tar.gz"} -"platforms.linux-x64" = { checksum = "sha256:c33c39ed9c80deddde77c960d00119918b9e352426fd604ba41638d6526a4744", url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-linux-x64.tar.gz"} -"platforms.macos-arm64" = { checksum = "sha256:5ed4db0fcf1eaf84d91ad12462631d73bf4576c1377e192d222e48026a902640", url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-darwin-arm64.tar.gz"} -"platforms.macos-x64" = { checksum = "sha256:5ea50c9d6dea3dfa3abb66b2656f7a4e1c8cef23432b558d45fb538c7b5dedce", url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-darwin-x64.tar.gz"} + +[tools.node."platforms.linux-arm64"] +checksum = "sha256:25ba95dfb96871fa2ef977f11f95ea90818c8fa15c0f2110771db08d4ba423be" +url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-linux-arm64.tar.gz" + +[tools.node."platforms.linux-x64"] +checksum = "sha256:c33c39ed9c80deddde77c960d00119918b9e352426fd604ba41638d6526a4744" +url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-linux-x64.tar.gz" + +[tools.node."platforms.macos-arm64"] +checksum = "sha256:5ed4db0fcf1eaf84d91ad12462631d73bf4576c1377e192d222e48026a902640" +url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-darwin-arm64.tar.gz" + +[tools.node."platforms.macos-x64"] +checksum = "sha256:5ea50c9d6dea3dfa3abb66b2656f7a4e1c8cef23432b558d45fb538c7b5dedce" +url = "https://nodejs.org/dist/v22.22.0/node-v22.22.0-darwin-x64.tar.gz" diff --git a/packages/cli/src/commands/consent/upload-preferences/impl.ts b/packages/cli/src/commands/consent/upload-preferences/impl.ts index 50932196..e4a8bd09 100644 --- a/packages/cli/src/commands/consent/upload-preferences/impl.ts +++ b/packages/cli/src/commands/consent/upload-preferences/impl.ts @@ -93,7 +93,11 @@ function getCurrentModulePath(): string { if (typeof __filename !== 'undefined') { return __filename as unknown as string; } - return process.argv[1]; + try { + return new URL(import.meta.url).pathname; + } catch { + return process.argv[1]; + } } export interface UploadPreferencesCommandFlags { From 09843cd46cf59bc150ab4259ec44188a77db7221 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 14:47:35 -0700 Subject: [PATCH 03/10] MErgeS --- .../upload-preferences/artifacts/receipts/receiptsState.ts | 2 +- packages/cli/src/commands/consent/upload-preferences/impl.ts | 3 ++- .../cli/src/commands/consent/upload-preferences/schemaState.ts | 2 +- .../upload-preferences/upload/buildInteractiveUploadPlan.ts | 2 +- .../upload/interactivePreferenceUploaderFromPlan.ts | 2 +- .../lib/preference-management/getPreferencesForIdentifiers.ts | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts index abd5c735..c7975fbb 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts @@ -1,6 +1,6 @@ import { PersistedState } from '@transcend-io/persisted-state'; +import { retrySamePromise, type RetryPolicy } from '@transcend-io/utils'; -import { retrySamePromise, type RetryPolicy } from '../../../../../lib/helpers/retrySamePromise.js'; import { RequestUploadReceipts, type FailingPreferenceUpdates, diff --git a/packages/cli/src/commands/consent/upload-preferences/impl.ts b/packages/cli/src/commands/consent/upload-preferences/impl.ts index e4a8bd09..649e7e9b 100644 --- a/packages/cli/src/commands/consent/upload-preferences/impl.ts +++ b/packages/cli/src/commands/consent/upload-preferences/impl.ts @@ -1,11 +1,11 @@ import { statSync, existsSync } from 'node:fs'; import { join } from 'node:path'; +import { chunkOneCsvFile } from '@transcend-io/utils'; import colors from 'colors'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; -import { chunkOneCsvFile } from '../../../lib/helpers/chunkOneCsvFile.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; import { computePoolSize, @@ -187,6 +187,7 @@ export async function uploadPreferences( outputDir: directory, clearOutputDir: false, chunkSizeMB, + logger, // eslint-disable-next-line @typescript-eslint/no-empty-function onProgress: () => {}, }); diff --git a/packages/cli/src/commands/consent/upload-preferences/schemaState.ts b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts index cf1ea894..2b09ddf1 100644 --- a/packages/cli/src/commands/consent/upload-preferences/schemaState.ts +++ b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts @@ -1,6 +1,6 @@ import { PersistedState } from '@transcend-io/persisted-state'; +import { retrySamePromise, type RetryPolicy } from '@transcend-io/utils'; -import { retrySamePromise, type RetryPolicy } from '../../../lib/helpers/retrySamePromise.js'; import { FileFormatState, type ColumnIdentifierMap, diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts index 99631fa8..4f8089b4 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts @@ -1,10 +1,10 @@ +import { limitRecords } from '@transcend-io/utils'; import colors from 'colors'; import type { Got } from 'got'; import type { GraphQLClient } from 'graphql-request'; import * as t from 'io-ts'; import type { FormattedAttribute } from '../../../../lib/graphql/formatAttributeValues.js'; -import { limitRecords } from '../../../../lib/helpers/index.js'; import type { FileFormatState, PendingSafePreferenceUpdates, diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts index 019e88c7..baddf974 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts @@ -1,4 +1,5 @@ import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; +import { extractErrorMessage, limitRecords } from '@transcend-io/utils'; import Bluebird from 'bluebird'; /* eslint-disable no-param-reassign */ import colors from 'colors'; @@ -6,7 +7,6 @@ import type { Got } from 'got'; import { chunk, groupBy } from 'lodash-es'; import { RETRYABLE_BATCH_STATUSES } from '../../../../constants.js'; -import { extractErrorMessage, limitRecords } from '../../../../lib/helpers/index.js'; import { logger } from '../../../../logger.js'; import type { PreferenceReceiptsInterface } from '../artifacts/receipts/index.js'; import { uploadChunkWithSplit } from './batchUploader.js'; diff --git a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts b/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts index 05c54d09..4571b095 100644 --- a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts +++ b/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts @@ -1,5 +1,6 @@ import { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; import { decodeCodec } from '@transcend-io/type-utils'; +import { extractErrorMessage, splitInHalf } from '@transcend-io/utils'; import colors from 'colors'; import type { Got } from 'got'; import { chunk } from 'lodash-es'; @@ -7,7 +8,6 @@ import { chunk } from 'lodash-es'; import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; import { logger } from '../../logger.js'; import { map } from '../bluebird.js'; -import { extractErrorMessage, splitInHalf } from '../helpers/index.js'; import { ConsentPreferenceResponse } from './types.js'; import { withPreferenceRetry } from './withPreferenceRetry.js'; From f965083948e49c742addf67c0ba7dc5c233efad0 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 17:33:08 -0700 Subject: [PATCH 04/10] Fix imports: use @transcend-io/sdk directly instead of graphql barrel re-exports Made-with: Cursor --- .../consent/configure-preference-upload/impl.ts | 2 +- .../upload/transform/buildPendingUpdates.ts | 7 ++----- .../commands/consent/upload-preferences/worker.ts | 15 ++++++--------- .../parsePreferenceIdentifiersFromCsv.ts | 2 +- .../parsePreferenceManagementCsv.ts | 2 +- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts index 1c051446..c4e1f6bf 100644 --- a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts +++ b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts @@ -1,6 +1,7 @@ import { createReadStream } from 'node:fs'; import { PersistedState } from '@transcend-io/persisted-state'; +import { buildTranscendGraphQLClient } from '@transcend-io/sdk'; import colors from 'colors'; import { parse as csvParse } from 'csv-parse'; import inquirer from 'inquirer'; @@ -8,7 +9,6 @@ import * as t from 'io-ts'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; -import { buildTranscendGraphQLClient } from '../../../lib/graphql/index.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; import { FileFormatState, diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts index 8bda5a1e..5c03269d 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts @@ -5,12 +5,9 @@ * PreferenceUpdateItem payloads, ready for upload. */ import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; +import type { PreferenceTopic, Purpose } from '@transcend-io/sdk'; -import type { - PreferenceTopic, - FormattedAttribute, - Purpose, -} from '../../../../../lib/graphql/index.js'; +import type { FormattedAttribute } from '../../../../../lib/graphql/index.js'; import { getPreferenceIdentifiersFromRow, getPreferenceUpdatesFromRow, diff --git a/packages/cli/src/commands/consent/upload-preferences/worker.ts b/packages/cli/src/commands/consent/upload-preferences/worker.ts index a5550d61..96055f39 100644 --- a/packages/cli/src/commands/consent/upload-preferences/worker.ts +++ b/packages/cli/src/commands/consent/upload-preferences/worker.ts @@ -1,10 +1,8 @@ import { mkdirSync, createWriteStream } from 'node:fs'; import { join, dirname } from 'node:path'; -import { - buildTranscendGraphQLClient, - createSombraGotInstance, -} from '../../../lib/graphql/index.js'; +import { buildTranscendGraphQLClient, createSombraGotInstance } from '@transcend-io/sdk'; + import type { ToWorker } from '../../../lib/pooling/index.js'; import { splitCsvToList } from '../../../lib/requests/index.js'; import { logger } from '../../../logger.js'; @@ -75,11 +73,10 @@ export async function runChild(): Promise { const receipts = await makeReceiptsState(receiptFilepath); const schema = await makeSchemaState(options.schemaFile); const client = buildTranscendGraphQLClient(options.transcendUrl, options.auth); - const sombra = await createSombraGotInstance( - options.transcendUrl, - options.auth, - options.sombraAuth, - ); + const sombra = await createSombraGotInstance(options.transcendUrl, options.auth, { + logger, + sombraApiKey: options.sombraAuth, + }); // Derive identifierColumns and columnsToIgnore from config const columnToIdentifier = schema.getColumnToIdentifier(); diff --git a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts index c1ed47cb..a730f4d1 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts @@ -1,5 +1,6 @@ import type { PersistedState } from '@transcend-io/persisted-state'; import type { PreferenceStoreIdentifier } from '@transcend-io/privacy-types'; +import type { Identifier } from '@transcend-io/sdk'; import Bluebird from 'bluebird'; import colors from 'colors'; import inquirer from 'inquirer'; @@ -7,7 +8,6 @@ import inquirer from 'inquirer'; import { uniq, keyBy } from 'lodash-es'; import { logger } from '../../logger.js'; -import type { Identifier } from '../graphql/index.js'; import { inquirerConfirmBoolean } from '../helpers/index.js'; import type { FileFormatState, IdentifierMetadataForPreference } from './codecs.js'; diff --git a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts index cb89b579..0b26ce9c 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts @@ -1,5 +1,6 @@ import { PersistedState } from '@transcend-io/persisted-state'; import type { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; +import type { Identifier, PreferenceTopic } from '@transcend-io/sdk'; import type { ObjByString } from '@transcend-io/type-utils'; import colors from 'colors'; import type { Got } from 'got'; @@ -7,7 +8,6 @@ import { keyBy } from 'lodash-es'; import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; import { logger } from '../../logger.js'; -import { PreferenceTopic, type Identifier } from '../graphql/index.js'; import { checkIfPendingPreferenceUpdatesAreNoOp } from './checkIfPendingPreferenceUpdatesAreNoOp.js'; import { checkIfPendingPreferenceUpdatesCauseConflict } from './checkIfPendingPreferenceUpdatesCauseConflict.js'; import { From f8325bccd771cc033e65f7b03d16abd8cd27a11b Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 21:40:37 -0700 Subject: [PATCH 05/10] Phase 2c: Reconcile duplicated preference-management files Delete 6 CLI files that were duplicated in SDK after the merge: - codecs.ts, getPreferenceMetadataFromRow.ts (identical) - checkIfPendingPreferenceUpdatesAreNoOp.ts, checkIfPendingPreferenceUpdatesCauseConflict.ts, getPreferenceUpdatesFromRow.ts (only import path diffs) - getPreferencesForIdentifiers.ts (ported enhanced split-on-validation logic into SDK version with Logger DI) Move PreferenceUploadProgress type to SDK types.ts so SDK getPreferencesForIdentifiers can use it. Update all CLI consumers to import from @transcend-io/sdk directly. SDK typecheck pass | CLI typecheck pass | 86 tests pass Made-with: Cursor --- .../configure-preference-upload/impl.ts | 2 +- .../artifacts/receipts/receiptsState.ts | 2 +- .../consent/upload-preferences/schemaState.ts | 2 +- .../upload/buildInteractiveUploadPlan.ts | 2 +- .../upload/transform/buildPendingUpdates.ts | 13 +- .../upload-preferences/upload/types.ts | 9 +- .../checkIfPendingPreferenceUpdatesAreNoOp.ts | 82 ----- ...IfPendingPreferenceUpdatesCauseConflict.ts | 134 --------- .../src/lib/preference-management/codecs.ts | 283 ------------------ .../getPreferenceMetadataFromRow.ts | 40 --- .../getPreferenceUpdatesFromRow.ts | 257 ---------------- .../getPreferencesForIdentifiers.ts | 201 ------------- .../src/lib/preference-management/index.ts | 6 - .../parsePreferenceAndPurposeValuesFromCsv.ts | 2 +- .../parsePreferenceFileFormatFromCsv.ts | 2 +- .../parsePreferenceIdentifiersFromCsv.ts | 2 +- .../parsePreferenceManagementCsv.ts | 29 +- .../getPreferencesForIdentifiers.test.ts | 21 +- .../getPreferencesForIdentifiers.ts | 152 +++++++--- .../sdk/src/preference-management/types.ts | 10 + 20 files changed, 165 insertions(+), 1086 deletions(-) delete mode 100644 packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesAreNoOp.ts delete mode 100644 packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesCauseConflict.ts delete mode 100644 packages/cli/src/lib/preference-management/codecs.ts delete mode 100644 packages/cli/src/lib/preference-management/getPreferenceMetadataFromRow.ts delete mode 100644 packages/cli/src/lib/preference-management/getPreferenceUpdatesFromRow.ts delete mode 100644 packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts diff --git a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts index c4e1f6bf..7012c5f3 100644 --- a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts +++ b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts @@ -10,8 +10,8 @@ import * as t from 'io-ts'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; +import { FileFormatState } from '@transcend-io/sdk'; import { - FileFormatState, parsePreferenceIdentifiersFromCsv, parsePreferenceFileFormatFromCsv, parsePreferenceAndPurposeValuesFromCsv, diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts index c7975fbb..7a834c0e 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/receipts/receiptsState.ts @@ -8,7 +8,7 @@ import { type PendingWithConflictPreferenceUpdates, type PreferenceUpdateMap, type SkippedPreferenceUpdates, -} from '../../../../../lib/preference-management/index.js'; +} from '@transcend-io/sdk'; export type PreferenceReceiptsInterface = { /** Path to file */ diff --git a/packages/cli/src/commands/consent/upload-preferences/schemaState.ts b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts index 2b09ddf1..3c200e85 100644 --- a/packages/cli/src/commands/consent/upload-preferences/schemaState.ts +++ b/packages/cli/src/commands/consent/upload-preferences/schemaState.ts @@ -6,7 +6,7 @@ import { type ColumnIdentifierMap, type ColumnMetadataMap, type ColumnPurposeMap, -} from '../../../lib/preference-management/index.js'; +} from '@transcend-io/sdk'; export interface PreferenceSchemaInterface { /** Name of the column used as timestamp, if any */ diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts index 4f8089b4..7eeffa75 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts @@ -10,7 +10,7 @@ import type { PendingSafePreferenceUpdates, PendingWithConflictPreferenceUpdates, SkippedPreferenceUpdates, -} from '../../../../lib/preference-management/codecs.js'; +} from '@transcend-io/sdk'; import { parsePreferenceManagementCsvWithCache } from '../../../../lib/preference-management/index.js'; import { parseAttributesFromString, readCsv } from '../../../../lib/requests/index.js'; import { logger } from '../../../../logger.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts index 5c03269d..9526c3e9 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts @@ -5,19 +5,22 @@ * PreferenceUpdateItem payloads, ready for upload. */ import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; -import type { PreferenceTopic, Purpose } from '@transcend-io/sdk'; - -import type { FormattedAttribute } from '../../../../../lib/graphql/index.js'; import { - getPreferenceIdentifiersFromRow, getPreferenceUpdatesFromRow, getPreferenceMetadataFromRow, - NONE_PREFERENCE_MAP, type ColumnIdentifierMap, type ColumnMetadataMap, type ColumnPurposeMap, type PendingSafePreferenceUpdates, type PendingWithConflictPreferenceUpdates, + type PreferenceTopic, + type Purpose, +} from '@transcend-io/sdk'; + +import type { FormattedAttribute } from '../../../../../lib/graphql/index.js'; +import { + getPreferenceIdentifiersFromRow, + NONE_PREFERENCE_MAP, } from '../../../../../lib/preference-management/index.js'; export interface BuildPendingParams { diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/types.ts b/packages/cli/src/commands/consent/upload-preferences/upload/types.ts index cb1e1b1f..b32ede66 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/types.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/types.ts @@ -1,8 +1 @@ -export interface PreferenceUploadProgress { - /** how many records just succeeded */ - successDelta: number; - /** cumulative successes in this file */ - successTotal: number; - /** total records that will be uploaded in this file */ - fileTotal: number; -} +export type { PreferenceUploadProgress } from '@transcend-io/sdk'; diff --git a/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesAreNoOp.ts b/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesAreNoOp.ts deleted file mode 100644 index ed1550c6..00000000 --- a/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesAreNoOp.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { - PreferenceQueryResponseItem, - PreferenceStorePurposeResponse, - PreferenceTopicType, -} from '@transcend-io/privacy-types'; -import type { PreferenceTopic } from '@transcend-io/sdk'; - -/** - * Check if the pending set of updates are exactly the same as the current consent record. - * - * @param options - Options - * @returns Whether the pending updates already exist in the preference store - */ -export function checkIfPendingPreferenceUpdatesAreNoOp({ - currentConsentRecord, - pendingUpdates, - preferenceTopics, -}: { - /** The current consent record */ - currentConsentRecord: PreferenceQueryResponseItem; - /** The pending updates */ - pendingUpdates: { - [purposeName in string]: Omit; - }; - /** The preference topic configurations */ - preferenceTopics: PreferenceTopic[]; -}): boolean { - // Check each update - return Object.entries(pendingUpdates).every(([purposeName, { preferences = [], enabled }]) => { - // Ensure the purpose exists - const currentPurpose = currentConsentRecord.purposes.find( - (existingPurpose) => existingPurpose.purpose === purposeName, - ); - - // Ensure purpose.enabled is in sync - // Also false if the purpose does not exist - const enabledIsInSync = !!currentPurpose && currentPurpose.enabled === enabled; - if (!enabledIsInSync) { - return false; - } - - // Compare the preferences are in sync - return preferences.every( - ({ topic, choice }) => - // ensure preferences exist on record - currentPurpose.preferences && - currentPurpose.preferences.find((existingPreference) => { - // find matching topic - if (existingPreference.topic !== topic) { - return false; - } - - // Determine type of preference topic - const preferenceTopic = preferenceTopics.find( - (x) => x.slug === topic && x.purpose.trackingType === purposeName, - ); - if (!preferenceTopic) { - throw new Error(`Could not find preference topic for ${topic}`); - } - - // Handle comparison based on type - switch (preferenceTopic.type) { - case PreferenceTopicType.Boolean: - return existingPreference.choice.booleanValue === choice.booleanValue; - case PreferenceTopicType.Select: - return existingPreference.choice.selectValue === choice.selectValue; - case PreferenceTopicType.MultiSelect: - // eslint-disable-next-line no-case-declarations - const sortedCurrentValues = (existingPreference.choice.selectValues || []).sort(); - // eslint-disable-next-line no-case-declarations - const sortedNewValues = (choice.selectValues || []).sort(); - return ( - sortedCurrentValues.length === sortedNewValues.length && - sortedCurrentValues.every((x, i) => x === sortedNewValues[i]) - ); - default: - throw new Error(`Unknown preference topic type: ${preferenceTopic.type}`); - } - }), - ); - }); -} diff --git a/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesCauseConflict.ts b/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesCauseConflict.ts deleted file mode 100644 index 0865733d..00000000 --- a/packages/cli/src/lib/preference-management/checkIfPendingPreferenceUpdatesCauseConflict.ts +++ /dev/null @@ -1,134 +0,0 @@ -import { - PreferenceQueryResponseItem, - PreferenceStorePurposeResponse, - PreferenceTopicType, -} from '@transcend-io/privacy-types'; -import type { PreferenceTopic } from '@transcend-io/sdk'; - -import { logger } from '../../logger.js'; - -/** - * Check if the pending set of updates will result in a change of - * value to an existing purpose or preference in the preference store. - * - * @param options - Options - * @returns True if conflict, false if no conflict and just adding new data for first time - */ -export function checkIfPendingPreferenceUpdatesCauseConflict({ - currentConsentRecord, - pendingUpdates, - preferenceTopics, - log, -}: { - /** The current consent record */ - currentConsentRecord: PreferenceQueryResponseItem; - /** The pending updates */ - pendingUpdates: { - [purposeName in string]: Omit; - }; - /** The preference topic configurations */ - preferenceTopics: PreferenceTopic[]; - /** Whether to log the conflict */ - log?: boolean; -}): boolean { - // Check if any update has conflict - return !!Object.entries(pendingUpdates).find(([purposeName, { preferences = [], enabled }]) => { - // Ensure the purpose exists - const currentPurpose = currentConsentRecord.purposes.find( - (existingPurpose) => existingPurpose.purpose === purposeName, - ); - - // If no purpose exists, then it is not a conflict - if (!currentPurpose) { - if (log) { - logger.warn( - `No existing purpose found for ${purposeName} in consent record for ${currentConsentRecord.userId}.`, - ); - } - return false; - } - - // If purpose.enabled value is off, this is a conflict - if (currentPurpose.enabled !== enabled) { - if (log) { - logger.warn( - `Purpose ${purposeName} enabled value conflict for user ${currentConsentRecord.userId}. ` + - `Pending Value: ${enabled}, Current Value: ${currentPurpose.enabled}`, - ); - } - return true; - } - - // Check if any preferences are out of sync - return !!preferences.find(({ topic, choice }) => { - // find matching topic - const currentPreference = (currentPurpose.preferences || []).find( - (existingPreference) => existingPreference.topic === topic, - ); - - // if no topic exists, no conflict - if (!currentPreference) { - if (log) { - logger.warn( - `No existing preference found for topic ${topic} in purpose ` + - `${purposeName} for user ${currentConsentRecord.userId}.`, - ); - } - return false; - } - - // Determine type of preference topic - const preferenceTopic = preferenceTopics.find( - (x) => x.slug === topic && x.purpose.trackingType === purposeName, - ); - if (!preferenceTopic) { - throw new Error(`Could not find preference topic for ${topic}`); - } - - // Handle comparison based on type - let boolMatch: boolean; - let selectMatch: boolean; - switch (preferenceTopic.type) { - case PreferenceTopicType.Boolean: - boolMatch = currentPreference.choice.booleanValue !== choice.booleanValue; - if (log) { - logger.warn( - `Preference topic ${topic} boolean value conflict for user ` + - `${currentConsentRecord.userId}. Expected: ${choice.booleanValue}, ` + - `Found: ${currentPreference.choice.booleanValue}`, - ); - } - return boolMatch; - case PreferenceTopicType.Select: - selectMatch = currentPreference.choice.selectValue !== choice.selectValue; - if (log) { - logger.warn( - `Preference topic ${topic} select value conflict for user ` + - `${currentConsentRecord.userId}. Expected: ${choice.selectValue}, ` + - `Found: ${currentPreference.choice.selectValue}`, - ); - } - return selectMatch; - case PreferenceTopicType.MultiSelect: - // eslint-disable-next-line no-case-declarations - const sortedCurrentValues = (currentPreference.choice.selectValues || []).sort(); - // eslint-disable-next-line no-case-declarations - const sortedNewValues = (choice.selectValues || []).sort(); - selectMatch = - sortedCurrentValues.length !== sortedNewValues.length || - !sortedCurrentValues.every((x, i) => x === sortedNewValues[i]); - if (log) { - logger.warn( - `Preference topic ${topic} multi-select value conflict for user ` + - `${currentConsentRecord.userId}. Expected: ${sortedNewValues.join( - ', ', - )}, Found: ${sortedCurrentValues.join(', ')}`, - ); - } - return selectMatch; - default: - throw new Error(`Unknown preference topic type: ${preferenceTopic.type}`); - } - }); - }); -} diff --git a/packages/cli/src/lib/preference-management/codecs.ts b/packages/cli/src/lib/preference-management/codecs.ts deleted file mode 100644 index 955457c3..00000000 --- a/packages/cli/src/lib/preference-management/codecs.ts +++ /dev/null @@ -1,283 +0,0 @@ -import { - PreferenceQueryResponseItem, - PreferenceStoreIdentifier, - PreferenceUpdateItem, -} from '@transcend-io/privacy-types'; -import * as t from 'io-ts'; - -export const PurposeRowMapping = t.type({ - /** - * The slug or trackingType of the purpose to map to - * - * e.g. `Marketing` - */ - purpose: t.string, - /** - * If the column maps to a preference instead of a purpose - * this is the slug of the purpose. - * - * null value indicates that this column maps to the true/false - * value of the purpose - */ - preference: t.union([t.string, t.null]), - /** - * The mapping between each row value and purpose/preference value. - * - * e.g. for a boolean preference or purpose - * { - * 'true': true, - * 'false': false, - * '': true, - * } - * - * or for a single or multi select preference - * { - * '': true, - * 'value1': 'Value1', - * 'value2': 'Value2', - * } - */ - valueMapping: t.record(t.string, t.union([t.string, t.boolean, t.null, t.undefined])), -}); - -/** Override type */ -export type PurposeRowMapping = t.TypeOf; - -/** - * Mapping of column name to purpose row mapping. - * This is used to map each column in the CSV to the relevant purpose and preference definitions in - * transcend. - */ -export const ColumnPurposeMap = t.record(t.string, PurposeRowMapping); - -/** Override type */ -export type ColumnPurposeMap = t.TypeOf; - -export const IdentifierMetadataForPreference = t.type({ - /** The identifier name */ - name: t.string, - /** Is unique on preference store */ - isUniqueOnPreferenceStore: t.boolean, -}); - -/** Override type */ -export type IdentifierMetadataForPreference = t.TypeOf; - -/** - * Mapping of identifier name to the column name in the CSV file. - * This is used to map each identifier name to the column in the CSV file. - */ -export const ColumnIdentifierMap = t.record(t.string, IdentifierMetadataForPreference); - -/** Override type */ -export type ColumnIdentifierMap = t.TypeOf; - -/** Mapping of a CSV column to a metadata key in the preference store. */ -export const MetadataMapping = t.type({ - /** The metadata key name in the preference store */ - key: t.string, -}); - -/** Override type */ -export type MetadataMapping = t.TypeOf; - -/** Record mapping CSV column names to metadata keys. */ -export const ColumnMetadataMap = t.record(t.string, MetadataMapping); - -/** Override type */ -export type ColumnMetadataMap = t.TypeOf; - -export const FileFormatState = t.intersection([ - t.type({ - /** - * Definition of how to map each column in the CSV to - * the relevant purpose and preference definitions in transcend - */ - columnToPurposeName: ColumnPurposeMap, - /** Last time the file was last parsed at */ - lastFetchedAt: t.string, - /** The column name that maps to the identifier */ - columnToIdentifier: ColumnIdentifierMap, - }), - t.partial({ - /** Determine which column name in file maps to the timestamp */ - timestampColumn: t.string, - /** Mapping of CSV column names to metadata keys */ - columnToMetadata: ColumnMetadataMap, - /** CSV columns that should be ignored during upload */ - columnsToIgnore: t.array(t.string), - }), -]); - -/** Override type */ -export type FileFormatState = t.TypeOf; - -/** - * This is the type of the receipts that are stored in the file - * that is used to track the state of the upload process. - * It is used to resume the upload process from where it left off. - * It is used to persist the state of the upload process across multiple runs. - */ -export const PreferenceUpdateMap = t.record( - t.string, - // This can either be true to indicate the record is pending - // or it can be an object showing the object - // We only return a fixed number of results to avoid - // making the JSON file too large - t.union([t.boolean, PreferenceUpdateItem]), -); - -/** Override type */ -export type PreferenceUpdateMap = t.TypeOf; - -/** - * This is the type of the pending updates that are safe to run without - * conflicts with existing consent preferences. - * - * Key is primaryKey of the record in the file. - * The value is the row in the file that is safe to upload. - */ -export const PendingSafePreferenceUpdates = t.record( - t.string, - // This can either be true to indicate the record is safe - // or it can be an object showing the object - // We only return a fixed number of results to avoid - // making the JSON file too large - t.union([t.boolean, t.record(t.string, t.string)]), -); - -/** Override type */ -export type PendingSafePreferenceUpdates = t.TypeOf; - -/** - * These are the updates that failed to be uploaded to the API. - */ -export const FailingPreferenceUpdates = t.record( - t.string, - t.type({ - /** Time upload ran at */ - uploadedAt: t.string, - /** Attempts to upload that resulted in an error */ - error: t.string, - /** The update body */ - update: PreferenceUpdateItem, - }), -); - -/** Override type */ -export type FailingPreferenceUpdates = t.TypeOf; - -/** - * This is the type of the pending updates that are in conflict with existing consent preferences. - * - * Key is primaryKey of the record in the file. - * The value is the row in the file that is pending upload. - */ -export const PendingWithConflictPreferenceUpdates = t.record( - t.string, - // We always return the conflicts for investigation - t.type({ - /** Record to be inserted to transcend v1/preferences API */ - record: PreferenceQueryResponseItem, - /** The row in the file that is pending upload */ - row: t.record(t.string, t.string), - }), -); - -/** Override type */ -export type PendingWithConflictPreferenceUpdates = t.TypeOf< - typeof PendingWithConflictPreferenceUpdates ->; - -/** - * The set of preference updates that are skipped - * Key is primaryKey and value is the row in the CSV - * that is skipped. - * - * This is usually because the preferences are already in the store - * or there are duplicate rows in the CSV file that are identical. - */ -export const SkippedPreferenceUpdates = t.record(t.string, t.record(t.string, t.string)); - -/** Override type */ -export type SkippedPreferenceUpdates = t.TypeOf; - -export const RequestUploadReceipts = t.type({ - /** Last time the file was last parsed at */ - lastFetchedAt: t.string, - /** Safe updates (no conflict with existing preferences) keyed by primaryKey */ - pendingSafeUpdates: PendingSafePreferenceUpdates, - /** Conflict updates (existing preferences differ) keyed by primaryKey */ - pendingConflictUpdates: PendingWithConflictPreferenceUpdates, - /** Skipped rows (already in store or duplicates) keyed by primaryKey */ - skippedUpdates: SkippedPreferenceUpdates, - /** Failed uploads keyed by primaryKey */ - failingUpdates: FailingPreferenceUpdates, - /** Pending uploads at time of last cache write; shrinks as processed */ - pendingUpdates: PreferenceUpdateMap, - /** Successfully processed uploads keyed by primaryKey */ - successfulUpdates: PreferenceUpdateMap, -}); - -/** Override type */ -export type RequestUploadReceipts = t.TypeOf; - -export const DeletePreferenceRecordsInput = t.type({ - /** Array of consent preference records to delete */ - records: t.array( - t.type({ - /** The anchor identifier to locate the consent record */ - anchorIdentifier: PreferenceStoreIdentifier, - /** The ISO 8601 timestamp of when the deletion is requested */ - timestamp: t.string, - }), - ), -}); - -/** Override type */ -export type DeletePreferenceRecordsInput = t.TypeOf; - -export const DeletePreferenceRecordsResponse = t.intersection([ - t.type({ - /** Array of results for each preference record deletion */ - records: t.array( - t.intersection([ - t.type({ - /** Whether the deletion was successful */ - success: t.boolean, - }), - t.partial({ - /** An error message if the deletion failed */ - errorMessage: t.string, - }), - ]), - ), - /** The list of failed deletions with their respective errors */ - failures: t.array( - t.type({ - /** The index of the failed update in the original request */ - index: t.number, - /** The error message associated with the failure */ - error: t.string, - }), - ), - }), - t.partial({ - /** Any general errors that occurred during the operation */ - errors: t.array(t.string), - }), -]); - -/** Override type */ -export type DeletePreferenceRecordsResponse = t.TypeOf; - -/** CLI CSV Row for deleting preference records */ -export const DeletePreferenceRecordCliCsvRow = t.type({ - /** The name of the identifier type (e.g., email, userId) */ - name: t.string, - /** The value of the identifier */ - value: t.string, -}); - -/** Override type */ -export type DeletePreferenceRecordCliCsvRow = t.TypeOf; diff --git a/packages/cli/src/lib/preference-management/getPreferenceMetadataFromRow.ts b/packages/cli/src/lib/preference-management/getPreferenceMetadataFromRow.ts deleted file mode 100644 index 7141b898..00000000 --- a/packages/cli/src/lib/preference-management/getPreferenceMetadataFromRow.ts +++ /dev/null @@ -1,40 +0,0 @@ -import type { ColumnMetadataMap } from './codecs.js'; - -/** - * Extract metadata values from a CSV row based on the column-to-metadata mapping. - * - * @param options - Options for extracting metadata - * @returns Array of metadata key-value pairs for the preference store API - */ -export function getPreferenceMetadataFromRow({ - row, - columnToMetadata, -}: { - /** The CSV row as a record of column name to value */ - row: Record; - /** Mapping from CSV column name to metadata key */ - columnToMetadata: ColumnMetadataMap; -}): Array<{ - /** Metadata key name */ key: string; - /** Metadata value from the CSV row */ - value: string; -}> { - return Object.entries(columnToMetadata) - .map(([columnName, { key }]) => { - const value = row[columnName]; - // Skip if no value in the row or empty string - if (value === undefined || value === '') { - return null; - } - return { key, value }; - }) - .filter( - ( - x, - ): x is { - /** Metadata key name */ key: string; - /** Metadata value from the CSV row */ - value: string; - } => x !== null, - ); -} diff --git a/packages/cli/src/lib/preference-management/getPreferenceUpdatesFromRow.ts b/packages/cli/src/lib/preference-management/getPreferenceUpdatesFromRow.ts deleted file mode 100644 index 51dab6a0..00000000 --- a/packages/cli/src/lib/preference-management/getPreferenceUpdatesFromRow.ts +++ /dev/null @@ -1,257 +0,0 @@ -import { PreferenceStorePurposeResponse, PreferenceTopicType } from '@transcend-io/privacy-types'; -import type { PreferenceTopic } from '@transcend-io/sdk'; -import { apply } from '@transcend-io/type-utils'; - -import { splitCsvToList } from '@transcend-io/utils'; -import { PurposeRowMapping } from './codecs.js'; - -/** - * Parse an arbitrary object to the Transcend PUT /v1/preference update shape - * by using a mapping of column names to purpose/preference slugs. - * - * `columnToPurposeName` looks like: - * { - * 'my_purpose': { purpose: 'Marketing', preference: null, valueMapping: { 'true': true, 'false': false } }, - * 'has_topic_1': { purpose: 'Marketing', preference: 'BooleanPreference1', valueMapping: { 'true': true, 'false': false } }, - * 'has_topic_2': { purpose: 'Marketing', preference: 'SingleSelectPreference', valueMapping: { 'Option 1': 'Value1', 'Option 2': 'Value2' } } - * } - * - * `row` looks like: - * { - * 'my_purpose': 'true', - * 'has_topic_1': 'true', - * 'has_topic_2': 'Option 1' - * } - * - * OMISSION RULE: - * - If `valueMapping[row[columnName]]` - * returns `undefined` or `null`, we **omit** that column entirely (do not set purpose enabled, do not push a preference). - * - For MultiSelect, **each token** is treated independently: tokens that map to `undefined|null` are skipped; - * if all tokens are skipped, nothing is pushed. - * - We still validate **types** for mapped values (e.g., boolean must map to boolean, select must map to string, etc.). - * - * NOTE: - * - Final shape must have `enabled` for every purpose touched (enforced by `apply` below). If you omit all top-level purpose mappings, - * but emit preferences, this will throw at the end. This preserves the existing “enabled required” contract. - * - * @param options - Options - * @returns The parsed row - */ -export function getPreferenceUpdatesFromRow({ - row, - columnToPurposeName, - purposeSlugs, - preferenceTopics, -}: { - /** Row to parse */ - row: Record; - /** Mapping from column name to parser config */ - columnToPurposeName: Record; - /** The set of allowed purpose slugs */ - purposeSlugs: string[]; - /** The preference topics */ - preferenceTopics: PreferenceTopic[]; -}): { - [k in string]: Omit; -} { - // Create a result object to store the parsed preferences - const result: { - [k in string]: Partial; - } = {}; - - // Iterate over each column and map to the purpose or preference - Object.entries(columnToPurposeName).forEach( - ([columnName, { purpose, preference, valueMapping }]) => { - // Ensure the purpose is valid - if (!purposeSlugs.includes(purpose)) { - throw new Error(`Invalid purpose slug: ${purpose}, expected: ${purposeSlugs.join(', ')}`); - } - - // The raw value from the CSV row for this column - const rawValue = row[columnName]; - - // Check if parsing a preference or just the top level purpose - if (preference) { - const preferenceTopic = preferenceTopics.find( - (x) => x.slug === preference && x.purpose.trackingType === purpose, - ); - if (!preferenceTopic) { - const allowedTopics = preferenceTopics - .filter((x) => x.purpose.trackingType === purpose) - .map((x) => x.slug); - throw new Error( - `Invalid preference slug: ${preference} for purpose: ${purpose}. ` + - `Allowed preference slugs for purpose are: ${allowedTopics.join(',')}`, - ); - } - - // Ensure destination array - if (!result[purpose]) { - result[purpose] = { - preferences: [], - }; - } - if (!result[purpose].preferences) { - result[purpose].preferences = []; - } - - // handle each type of preference - switch (preferenceTopic.type) { - case PreferenceTopicType.Boolean: { - const mappedValue = valueMapping[rawValue]; - // Throw error on missing mapping - if (mappedValue === undefined && rawValue !== '') { - throw new Error( - `No preference mapping found for value "${rawValue}" in column ` + - `"${columnName}" (purpose=${purpose}, preference=${preference})`, - ); - } - - // Purposefully missing mapping - if (mappedValue === null || mappedValue === undefined) { - return; - } - - // Ensure boolean - if (typeof mappedValue !== 'boolean') { - throw new Error( - `Invalid value for boolean preference: ${preference}, expected boolean, got: ${rawValue}`, - ); - } - result[purpose].preferences!.push({ - topic: preference, - choice: { booleanValue: mappedValue }, - }); - break; - } - - case PreferenceTopicType.Select: { - const mappedValue = valueMapping[rawValue]; - // Throw error on missing mapping - if (mappedValue === undefined && rawValue !== '') { - throw new Error( - `No preference mapping found for value "${rawValue}" in column ` + - `"${columnName}" (purpose=${purpose}, preference=${preference})`, - ); - } - - // Omit if null - if (mappedValue === null || mappedValue === undefined) { - return; - } - - // Ensure string - if (typeof mappedValue !== 'string') { - throw new Error( - `Invalid value for select preference: ${preference}, expected string, got: ${rawValue}`, - ); - } - const trimmed = mappedValue.trim() || null; - - if ( - trimmed && - !preferenceTopic.preferenceOptionValues.map(({ slug }) => slug).includes(trimmed) - ) { - throw new Error( - `Invalid value for select preference: ${preference}, expected one of: ` + - `${preferenceTopic.preferenceOptionValues - .map(({ slug }) => slug) - .join(', ')}, got: ${rawValue}`, - ); - } - - result[purpose].preferences!.push({ - topic: preference, - choice: { selectValue: trimmed }, - }); - break; - } - - case PreferenceTopicType.MultiSelect: { - if (typeof rawValue !== 'string') { - throw new Error( - `Invalid value for multi select preference: ${preference}, expected string, got: ${rawValue}`, - ); - } - - // IMPORTANT: Do NOT rely on valueMapping[rawValue] for CSV. - // Split and map per token with the new rule. - const selectValues = splitCsvToList(rawValue) - .map((token) => { - const tokenMapped = valueMapping[token]; - // Throw error on missing mapping - if (tokenMapped === undefined && rawValue !== '') { - throw new Error( - `No preference mapping found for multi select token "${rawValue}" in column ` + - `"${columnName}" (purpose=${purpose}, preference=${preference})`, - ); - } - - // Omit if null - if (tokenMapped === null || tokenMapped === undefined) { - return null; - } - - // Ensure string - if (typeof tokenMapped !== 'string') { - throw new Error( - `Invalid value for multi select preference: ${preference}, ` + - `expected one of: ${preferenceTopic.preferenceOptionValues - .map(({ slug }) => slug) - .join(', ')}, got: ${token}`, - ); - } - return tokenMapped; - }) - .filter((x): x is string => x !== null) - .sort((a, b) => a.localeCompare(b)); - - // Only push if at least one mapped token survived - if (selectValues.length > 0) { - result[purpose].preferences!.push({ - topic: preference, - choice: { selectValues }, - }); - } - break; - } - - default: - throw new Error(`Unknown preference type: ${preferenceTopic.type}`); - } - } else { - // Top-level purpose (no preference) - const mappedValue = valueMapping[rawValue]; - if (mappedValue === undefined && rawValue !== '') { - throw new Error( - `No preference mapping found for value "${rawValue}" in column ` + - `"${columnName}" (purpose=${purpose}, preference=∅) ${JSON.stringify(row)}`, - ); - } - if (mappedValue === null) { - return; // Omit if null - } - - if (!result[purpose]) { - // Top-level purpose: set enabled strictly from mapped boolean - result[purpose] = { enabled: mappedValue === true }; - } else { - // Preserve preferences; update enabled - result[purpose].enabled = mappedValue === true; - } - } - }, - ); - - // Ensure that enabled is provided for any purpose that appears. - // (This preserves the prior contract and existing tests.) - return apply(result, (x, purposeName) => { - if (typeof x.enabled !== 'boolean') { - throw new Error(`No mapping provided for purpose.enabled=true/false value: ${purposeName}`); - } - return { - ...x, - enabled: x.enabled!, - }; - }); -} diff --git a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts b/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts deleted file mode 100644 index bd44a357..00000000 --- a/packages/cli/src/lib/preference-management/getPreferencesForIdentifiers.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; -import { decodeCodec } from '@transcend-io/type-utils'; -import { extractErrorMessage, map, splitInHalf } from '@transcend-io/utils'; -import colors from 'colors'; -import type { Got } from 'got'; -import { chunk } from 'lodash-es'; - -import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; -import { logger } from '../../logger.js'; -import { ConsentPreferenceResponse, withPreferenceRetry } from '@transcend-io/sdk'; - -/** - * Grab the current consent preference values for a list of identifiers - * - * @param sombra - Backend to make API call to - * @param options - Options - * @returns Plaintext context information - */ -export async function getPreferencesForIdentifiers( - sombra: Got, - { - identifiers, - partitionKey, - onProgress, - logInterval = 10000, - skipLogging = false, - concurrency = 40, - }: { - /** The list of identifiers to look up */ - identifiers: { - /** The value of the identifier */ - value: string; - /** The name of the identifier */ - name: string; - }[]; - /** The partition key to look up */ - partitionKey: string; - /** Whether to skip logging */ - skipLogging?: boolean; - /** The interval to log upload progress */ - logInterval?: number; - /** Concurrency for fetching identifiers */ - concurrency?: number; - /** on progress callback */ - onProgress?: (info: PreferenceUploadProgress) => void; - }, -): Promise { - const results: PreferenceQueryResponseItem[] = []; - const groupedIdentifiers = chunk(identifiers, 100); - - // create a new progress bar instance and use shades_classic theme - const t0 = new Date().getTime(); - - let total = 0; - onProgress?.({ - successDelta: 0, - successTotal: 0, - fileTotal: identifiers.length, // FIXME should be record not identifier count - }); - - /** - * Progress logger respecting `logInterval` - * - * @param delta - delta updated - */ - const maybeLogProgress = (delta: number): void => { - onProgress?.({ - successDelta: delta, - successTotal: total, - fileTotal: identifiers.length, - }); - - if (skipLogging) return; - const shouldLog = - total % logInterval === 0 || - Math.floor((total - identifiers.length) / logInterval) < Math.floor(total / logInterval); - if (shouldLog) { - logger.info( - colors.green( - `Fetched ${total}/${identifiers.length} user preferences from partition ${partitionKey}`, - ), - ); - } - }; - - /** - * Attempt a single POST for a given group with transient retries. - * Returns decoded nodes on success. - * Throws an error on terminal failure. - * If the error contains "did not pass validation", it throws that error up - * so the caller can choose to split. - * - * @param group - The group of identifiers to fetch - * @returns The decoded nodes from the response - */ - const postGroupWithRetries = async ( - group: { - /** Value of the identifier */ - value: string; - /** Name of the identifier */ - name: string; - }[], - ): Promise => { - const rawResult = await withPreferenceRetry( - 'Preference Query', - () => - sombra - .post(`v1/preferences/${partitionKey}/query`, { - json: { - filter: { identifiers: group }, - }, - }) - .json(), - { - logger, - onRetry: (attempt: number, _err: unknown, msg: string) => { - logger.warn( - colors.yellow( - `[RETRY v1/preferences/${partitionKey}/query] ` + - `group size=${group.length} partition=${partitionKey} attempt=${attempt}: ${msg}`, - ), - ); - }, - }, - ); - - const result = decodeCodec(ConsentPreferenceResponse, rawResult); - return result.nodes; - }; - - /** - * Recursively process a group: - * - Try to fetch in one go. - * - If it fails with "did not pass validation", split into halves and recurse. - * - If the group becomes a singleton and still fails validation, skip it. - * In all terminal paths (success or skip), increment `total` by the - * number of identifiers accounted for and log progress. - * - * @param group - The group of identifiers to process - */ - const processGroup = async ( - group: { - /** Value of the identifier */ - value: string; - /** Name of the identifier */ - name: string; - }[], - ): Promise => { - try { - const nodes = await postGroupWithRetries(group); - results.push(...nodes); - total += group.length; - maybeLogProgress(group.length); - } catch (err) { - const msg = extractErrorMessage(err); - - if (/did not pass validation/i.test(msg)) { - // If single, skip and count it - if (group.length === 1) { - const only = group[0]; - logger.warn(colors.yellow(`Skipping identifier "${only.value}" (${only.name}): ${msg}`)); - total += 1; - maybeLogProgress(1); - return; - } - - // Otherwise, split and recurse - const [left, right] = splitInHalf(group); - logger.warn( - colors.yellow( - `Group of ${group.length} did not pass validation. Splitting into ${left.length} and ${right.length}.`, - ), - ); - await processGroup(left); - await processGroup(right); - return; - } - - // Non-validation terminal error: rethrow - throw err; - } - }; - - await map( - groupedIdentifiers, - async (group) => { - await processGroup(group); - }, - { concurrency }, - ); - - const t1 = new Date().getTime(); - const totalTime = t1 - t0; - - if (!skipLogging) { - // Log completion time - logger.info(colors.green(`Completed download in "${totalTime / 1000}" seconds.`)); - } - - return results; -} diff --git a/packages/cli/src/lib/preference-management/index.ts b/packages/cli/src/lib/preference-management/index.ts index 3d44c303..d65a4028 100644 --- a/packages/cli/src/lib/preference-management/index.ts +++ b/packages/cli/src/lib/preference-management/index.ts @@ -1,9 +1,3 @@ -export * from './codecs.js'; -export * from './getPreferencesForIdentifiers.js'; -export * from './getPreferenceUpdatesFromRow.js'; -export * from './getPreferenceMetadataFromRow.js'; -export * from './checkIfPendingPreferenceUpdatesAreNoOp.js'; -export * from './checkIfPendingPreferenceUpdatesCauseConflict.js'; export * from './parsePreferenceManagementCsv.js'; export * from './parsePreferenceIdentifiersFromCsv.js'; export * from './parsePreferenceFileFormatFromCsv.js'; diff --git a/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts index eb2283ff..94c76973 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceAndPurposeValuesFromCsv.ts @@ -8,7 +8,7 @@ import { uniq, difference } from 'lodash-es'; import { logger } from '../../logger.js'; import { splitCsvToList } from '@transcend-io/utils'; -import { FileFormatState } from './codecs.js'; +import { FileFormatState } from '@transcend-io/sdk'; /** Values that clearly mean "no preference recorded" and should map to null. */ const NULL_VALUES = new Set(['', 'undefined', 'null', 'none', 'n/a', 'na']); diff --git a/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts index ea439596..14b4aa1a 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceFileFormatFromCsv.ts @@ -4,7 +4,7 @@ import inquirer from 'inquirer'; import { uniq, difference } from 'lodash-es'; import { logger } from '../../logger.js'; -import { FileFormatState } from './codecs.js'; +import { FileFormatState } from '@transcend-io/sdk'; export const NONE_PREFERENCE_MAP = '[NONE]'; diff --git a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts index a730f4d1..46e7c3ea 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceIdentifiersFromCsv.ts @@ -9,7 +9,7 @@ import { uniq, keyBy } from 'lodash-es'; import { logger } from '../../logger.js'; import { inquirerConfirmBoolean } from '../helpers/index.js'; -import type { FileFormatState, IdentifierMetadataForPreference } from './codecs.js'; +import type { FileFormatState, IdentifierMetadataForPreference } from '@transcend-io/sdk'; const { mapSeries } = Bluebird; diff --git a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts index 0b26ce9c..0e6ee066 100644 --- a/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts +++ b/packages/cli/src/lib/preference-management/parsePreferenceManagementCsv.ts @@ -1,24 +1,25 @@ import { PersistedState } from '@transcend-io/persisted-state'; import type { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; -import type { Identifier, PreferenceTopic } from '@transcend-io/sdk'; -import type { ObjByString } from '@transcend-io/type-utils'; -import colors from 'colors'; -import type { Got } from 'got'; -import { keyBy } from 'lodash-es'; - -import type { PreferenceUploadProgress } from '../../commands/consent/upload-preferences/upload/index.js'; -import { logger } from '../../logger.js'; -import { checkIfPendingPreferenceUpdatesAreNoOp } from './checkIfPendingPreferenceUpdatesAreNoOp.js'; -import { checkIfPendingPreferenceUpdatesCauseConflict } from './checkIfPendingPreferenceUpdatesCauseConflict.js'; import { + checkIfPendingPreferenceUpdatesAreNoOp, + checkIfPendingPreferenceUpdatesCauseConflict, + getPreferencesForIdentifiers, + getPreferenceUpdatesFromRow, type FileFormatState, + type Identifier, type PendingSafePreferenceUpdates, type PendingWithConflictPreferenceUpdates, + type PreferenceTopic, + type PreferenceUploadProgress, type RequestUploadReceipts, type SkippedPreferenceUpdates, -} from './codecs.js'; -import { getPreferencesForIdentifiers } from './getPreferencesForIdentifiers.js'; -import { getPreferenceUpdatesFromRow } from './getPreferenceUpdatesFromRow.js'; +} from '@transcend-io/sdk'; +import type { ObjByString } from '@transcend-io/type-utils'; +import colors from 'colors'; +import type { Got } from 'got'; +import { keyBy } from 'lodash-es'; + +import { logger } from '../../logger.js'; import { parsePreferenceAndPurposeValuesFromCsv } from './parsePreferenceAndPurposeValuesFromCsv.js'; import { parsePreferenceFileFormatFromCsv } from './parsePreferenceFileFormatFromCsv.js'; import { @@ -138,6 +139,7 @@ export async function parsePreferenceManagementCsvWithCache( logInterval: identifierDownloadLogInterval, partitionKey, concurrency: downloadIdentifierConcurrency, + logger, onProgress, }); @@ -250,7 +252,6 @@ export async function parsePreferenceManagementCsvWithCache( currentConsentRecord, pendingUpdates, preferenceTopics, - log: false, // update this to log for debugging purposes }) ) { pendingConflictUpdates[primaryKey] = { diff --git a/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts b/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts index 85dd7744..0d7b209a 100644 --- a/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts +++ b/packages/cli/src/lib/preference-management/tests/getPreferencesForIdentifiers.test.ts @@ -3,7 +3,7 @@ import type { Got } from 'got'; /* eslint-disable @typescript-eslint/no-explicit-any,@typescript-eslint/no-unused-vars,require-await */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { getPreferencesForIdentifiers } from '../getPreferencesForIdentifiers.js'; +import { getPreferencesForIdentifiers } from '@transcend-io/sdk'; // Hoisted shared spies / fakes const H = vi.hoisted(() => ({ @@ -72,15 +72,6 @@ vi.mock('@transcend-io/type-utils', async (importOriginal) => { }; }); -// withPreferenceRetry should invoke the provided fn and return its result, -// but we still want to see that it's being called. -const withRetrySpy = vi.fn(async (name: string, fn: () => Promise, _opts?: any) => fn()); - -vi.mock('../withPreferenceRetry.js', () => ({ - withPreferenceRetry: (name: string, fn: unknown, opts?: unknown) => - // @ts-expect-error test-only - withRetrySpy(name, fn, opts), -})); describe('getPreferencesForIdentifiers', () => { beforeEach(() => { @@ -147,8 +138,9 @@ describe('getPreferencesForIdentifiers', () => { const out = await getPreferencesForIdentifiers(sombra, { identifiers, partitionKey: 'p0', - skipLogging: true, // avoid logger.info + progress start + skipLogging: true, concurrency: 7, + logger: H.loggerSpies as any, }); // Expect 3 calls (100 + 100 + 50) @@ -187,8 +179,8 @@ describe('getPreferencesForIdentifiers', () => { // @ts-expect-error test-only capture expect(H.mapOpts.current?.concurrency).toBe(7); - // Ensure wrapper was used for each group - expect(withRetrySpy).toHaveBeenCalledTimes(3); + // withPreferenceRetry is used internally by the SDK — verified via sombra.post calls + expect(sombra.post).toHaveBeenCalledTimes(3); }, ); @@ -240,8 +232,9 @@ describe('getPreferencesForIdentifiers', () => { const out = await getPreferencesForIdentifiers(sombra, { identifiers, partitionKey: 'pA', - skipLogging: false, // enable start+info logs + skipLogging: false, concurrency: 2, + logger: H.loggerSpies as any, }); expect(out).toHaveLength(5); diff --git a/packages/sdk/src/preference-management/getPreferencesForIdentifiers.ts b/packages/sdk/src/preference-management/getPreferencesForIdentifiers.ts index bad9201d..0bb4adcc 100644 --- a/packages/sdk/src/preference-management/getPreferencesForIdentifiers.ts +++ b/packages/sdk/src/preference-management/getPreferencesForIdentifiers.ts @@ -1,14 +1,23 @@ import { PreferenceQueryResponseItem } from '@transcend-io/privacy-types'; import { decodeCodec } from '@transcend-io/type-utils'; -import { map, type Logger } from '@transcend-io/utils'; +import { + extractErrorMessage, + map, + splitInHalf, + type Logger, +} from '@transcend-io/utils'; import type { Got } from 'got'; import { chunk } from 'lodash-es'; -import { ConsentPreferenceResponse } from './types.js'; +import { ConsentPreferenceResponse, type PreferenceUploadProgress } from './types.js'; import { withPreferenceRetry } from './withPreferenceRetry.js'; /** - * Grab the current consent preference values for a list of identifiers + * Grab the current consent preference values for a list of identifiers. + * + * Uses recursive split-on-validation: if a group fails with + * "did not pass validation", it is halved and retried. Singletons + * that still fail are skipped. * * @param sombra - Backend to make API call to * @param options - Options @@ -19,26 +28,31 @@ export async function getPreferencesForIdentifiers( { identifiers, partitionKey, + onProgress, + logInterval = 10000, skipLogging = false, concurrency = 40, logger, - onProgress, }: { /** The list of identifiers to look up */ identifiers: { /** The value of the identifier */ value: string; + /** The name of the identifier */ + name: string; }[]; /** The partition key to look up */ partitionKey: string; /** Whether to skip logging */ skipLogging?: boolean; - /** Concurrency for requests (default 40) */ + /** The interval to log upload progress */ + logInterval?: number; + /** Concurrency for fetching identifiers */ concurrency?: number; /** Logger */ logger: Logger; - /** Optional progress callback (completed count, total identifiers) */ - onProgress?: (completed: number, total: number) => void; + /** Progress callback */ + onProgress?: (info: PreferenceUploadProgress) => void; }, ): Promise { const results: PreferenceQueryResponseItem[] = []; @@ -47,38 +61,106 @@ export async function getPreferencesForIdentifiers( const t0 = new Date().getTime(); let total = 0; - await map( - groupedIdentifiers, - async (group) => { - const rawResult = await withPreferenceRetry( - 'Preference Query', - () => - sombra - .post(`v1/preferences/${partitionKey}/query`, { - json: { - filter: { identifiers: group }, - limit: group.length, - }, - }) - .json(), - { - logger, - onRetry: (attempt, _err, msg) => { - logger.warn( - `[RETRY] group size=${group.length} partition=${partitionKey} attempt=${attempt}: ${msg}`, - ); - }, - }, + onProgress?.({ + successDelta: 0, + successTotal: 0, + fileTotal: identifiers.length, + }); + + const maybeLogProgress = (delta: number): void => { + onProgress?.({ + successDelta: delta, + successTotal: total, + fileTotal: identifiers.length, + }); + + if (skipLogging) return; + const shouldLog = + total % logInterval === 0 || + Math.floor((total - identifiers.length) / logInterval) < + Math.floor(total / logInterval); + if (shouldLog) { + logger.info( + `Fetched ${total}/${identifiers.length} user preferences from partition ${partitionKey}`, ); + } + }; + + const postGroupWithRetries = async ( + group: { value: string; name: string }[], + ): Promise => { + const rawResult = await withPreferenceRetry( + 'Preference Query', + () => + sombra + .post(`v1/preferences/${partitionKey}/query`, { + json: { + filter: { identifiers: group }, + }, + }) + .json(), + { + logger, + onRetry: (attempt, _err, msg) => { + logger.warn( + `[RETRY v1/preferences/${partitionKey}/query] ` + + `group size=${group.length} partition=${partitionKey} attempt=${attempt}: ${msg}`, + ); + }, + }, + ); + + const result = decodeCodec(ConsentPreferenceResponse, rawResult); + return result.nodes; + }; - const result = decodeCodec(ConsentPreferenceResponse, rawResult); - results.push(...result.nodes); + /** + * Recursively process a group: + * - Try to fetch in one go. + * - If it fails with "did not pass validation", split into halves and recurse. + * - If the group is a singleton and still fails validation, skip it. + */ + const processGroup = async ( + group: { value: string; name: string }[], + ): Promise => { + try { + const nodes = await postGroupWithRetries(group); + results.push(...nodes); total += group.length; - onProgress?.(total, identifiers.length); - }, - { - concurrency, + maybeLogProgress(group.length); + } catch (err) { + const msg = extractErrorMessage(err); + + if (/did not pass validation/i.test(msg)) { + if (group.length === 1) { + const only = group[0]!; + logger.warn( + `Skipping identifier "${only.value}" (${only.name}): ${msg}`, + ); + total += 1; + maybeLogProgress(1); + return; + } + + const [left, right] = splitInHalf(group); + logger.warn( + `Group of ${group.length} did not pass validation. Splitting into ${left.length} and ${right.length}.`, + ); + await processGroup(left); + await processGroup(right); + return; + } + + throw err; + } + }; + + await map( + groupedIdentifiers, + async (group) => { + await processGroup(group); }, + { concurrency }, ); const t1 = new Date().getTime(); diff --git a/packages/sdk/src/preference-management/types.ts b/packages/sdk/src/preference-management/types.ts index af6e718d..62356271 100644 --- a/packages/sdk/src/preference-management/types.ts +++ b/packages/sdk/src/preference-management/types.ts @@ -46,3 +46,13 @@ export type PreferencesQueryFilter = { /** Which dimension we chunk on */ export type ChunkMode = 'timestamp' | 'updated'; + +/** Progress info emitted during preference upload/fetch operations */ +export interface PreferenceUploadProgress { + /** how many records just succeeded */ + successDelta: number; + /** cumulative successes in this file */ + successTotal: number; + /** total records that will be uploaded in this file */ + fileTotal: number; +} From b3ec8f84b932ccc9cbb6454d6504d9ec087fdde9 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 22:33:18 -0700 Subject: [PATCH 06/10] Phase 3: Move upload pipeline + pooling to SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UploadProgressSink interface: - New packages/sdk/src/preference-upload/progress.ts with structured callbacks (onFileStart, onFileProgress, onFileComplete, onError, onJobComplete) for CLI receipts/dashboard, container progress.json, or agent events - noopProgressSink for when reporting isn't needed Upload core moved to SDK (packages/sdk/src/preference-upload/): - batchUploader.ts — Logger DI, stripped colors - loadReferenceData.ts — Logger DI, calls SDK fetchAll* - buildPendingUpdates.ts — pure transforms using SDK codecs - getPreferenceIdentifiersFromRow + NONE_PREFERENCE_MAP moved to SDK preference-management (pure functions, no CLI deps) Pooling moved to SDK (packages/sdk/src/pooling/): - runPool.ts — stripped colors, installInteractiveSwitcher now injectable callback (CLI passes it, container skips it) - spawnWorkerProcess.ts — openLogWindows replaced with onLogFilesCreated callback - types.ts, logRotation.ts, ensureLogFile.ts, safeGetLogPathsForSlot.ts CLI rewired to import from @transcend-io/sdk for moved modules. SDK typecheck ✓ | SDK build ✓ | CLI typecheck ✓ | 154 tests pass ✓ No colors in SDK ✓ Made-with: Cursor --- .../configure-preference-upload/impl.ts | 10 +- .../upload/buildInteractiveUploadPlan.ts | 20 +- .../upload-preferences/upload/index.ts | 9 +- .../interactivePreferenceUploaderFromPlan.ts | 4 +- .../upload/tests/batchUploader.test.ts | 9 +- .../upload/transform/buildPendingUpdates.ts | 8 +- .../upload/transform/index.ts | 7 +- .../upload/transform/transformCsv.ts | 2 + packages/sdk/src/index.ts | 2 + packages/sdk/src/pooling/ensureLogFile.ts | 13 + packages/sdk/src/pooling/index.ts | 6 + packages/sdk/src/pooling/logRotation.ts | 259 +++++++ packages/sdk/src/pooling/runPool.ts | 662 ++++++++++++++++++ .../sdk/src/pooling/safeGetLogPathsForSlot.ts | 28 + .../sdk/src/pooling/spawnWorkerProcess.ts | 215 ++++++ packages/sdk/src/pooling/types.ts | 63 ++ .../getPreferenceIdentifiersFromRow.ts | 34 + .../sdk/src/preference-management/index.ts | 1 + .../src/preference-upload/batchUploader.ts | 116 +++ .../preference-upload/buildPendingUpdates.ts | 140 ++++ packages/sdk/src/preference-upload/index.ts | 4 + .../preference-upload/loadReferenceData.ts | 37 + .../sdk/src/preference-upload/progress.ts | 56 ++ 23 files changed, 1679 insertions(+), 26 deletions(-) create mode 100644 packages/sdk/src/pooling/ensureLogFile.ts create mode 100644 packages/sdk/src/pooling/index.ts create mode 100644 packages/sdk/src/pooling/logRotation.ts create mode 100644 packages/sdk/src/pooling/runPool.ts create mode 100644 packages/sdk/src/pooling/safeGetLogPathsForSlot.ts create mode 100644 packages/sdk/src/pooling/spawnWorkerProcess.ts create mode 100644 packages/sdk/src/pooling/types.ts create mode 100644 packages/sdk/src/preference-management/getPreferenceIdentifiersFromRow.ts create mode 100644 packages/sdk/src/preference-upload/batchUploader.ts create mode 100644 packages/sdk/src/preference-upload/buildPendingUpdates.ts create mode 100644 packages/sdk/src/preference-upload/index.ts create mode 100644 packages/sdk/src/preference-upload/loadReferenceData.ts create mode 100644 packages/sdk/src/preference-upload/progress.ts diff --git a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts index 7012c5f3..9900064d 100644 --- a/packages/cli/src/commands/consent/configure-preference-upload/impl.ts +++ b/packages/cli/src/commands/consent/configure-preference-upload/impl.ts @@ -1,7 +1,11 @@ import { createReadStream } from 'node:fs'; import { PersistedState } from '@transcend-io/persisted-state'; -import { buildTranscendGraphQLClient } from '@transcend-io/sdk'; +import { + buildTranscendGraphQLClient, + FileFormatState, + loadReferenceData, +} from '@transcend-io/sdk'; import colors from 'colors'; import { parse as csvParse } from 'csv-parse'; import inquirer from 'inquirer'; @@ -10,7 +14,6 @@ import * as t from 'io-ts'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; -import { FileFormatState } from '@transcend-io/sdk'; import { parsePreferenceIdentifiersFromCsv, parsePreferenceFileFormatFromCsv, @@ -19,7 +22,6 @@ import { import { readCsv } from '../../../lib/requests/index.js'; import { logger } from '../../../logger.js'; import { computeSchemaFile } from '../upload-preferences/artifacts/index.js'; -import { loadReferenceData } from '../upload-preferences/upload/loadReferenceData.js'; export interface ConfigurePreferenceUploadFlags { auth: string; @@ -160,7 +162,7 @@ export async function configurePreferenceUpload( // 2) Fetch org reference data const client = buildTranscendGraphQLClient(transcendUrl, auth); - const { purposes, preferenceTopics, identifiers } = await loadReferenceData(client); + const { purposes, preferenceTopics, identifiers } = await loadReferenceData(client, { logger }); const allIdentifierNames = identifiers.map((id) => id.name); logger.info( diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts index 7eeffa75..f09fe5e1 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts @@ -1,3 +1,11 @@ +import { + loadReferenceData, + type FileFormatState, + type PendingSafePreferenceUpdates, + type PendingWithConflictPreferenceUpdates, + type PreferenceUploadReferenceData, + type SkippedPreferenceUpdates, +} from '@transcend-io/sdk'; import { limitRecords } from '@transcend-io/utils'; import colors from 'colors'; import type { Got } from 'got'; @@ -5,19 +13,11 @@ import type { GraphQLClient } from 'graphql-request'; import * as t from 'io-ts'; import type { FormattedAttribute } from '../../../../lib/graphql/formatAttributeValues.js'; -import type { - FileFormatState, - PendingSafePreferenceUpdates, - PendingWithConflictPreferenceUpdates, - SkippedPreferenceUpdates, -} from '@transcend-io/sdk'; import { parsePreferenceManagementCsvWithCache } from '../../../../lib/preference-management/index.js'; import { parseAttributesFromString, readCsv } from '../../../../lib/requests/index.js'; import { logger } from '../../../../logger.js'; import { type PreferenceReceiptsInterface } from '../artifacts/receipts/receiptsState.js'; import { type PreferenceSchemaInterface } from '../schemaState.js'; -import { loadReferenceData, type PreferenceUploadReferenceData } from './loadReferenceData.js'; -import { transformCsv } from './transform/index.js'; import type { PreferenceUploadProgress } from './types.js'; export interface InteractiveUploadPreferencePlan { @@ -127,11 +127,11 @@ export async function buildInteractiveUploadPreferencePlan({ ); // Build clients + reference data (purposes/topics/identifiers) - const references = await loadReferenceData(client); + const references = await loadReferenceData(client, { logger }); // Read in the file logger.info(colors.magenta(`Reading in file: "${file}"`)); - const preferences = transformCsv(readCsv(file, t.record(t.string, t.string))); + const preferences = readCsv(file, t.record(t.string, t.string)); logger.info(colors.magenta(`Read in ${preferences.length} rows`)); // Parse & validate CSV → derive safe/conflict/skipped sets (no uploading) diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts index ec3e310f..8713710a 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/index.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts @@ -1,6 +1,11 @@ export * from './types.js'; -export * from './loadReferenceData.js'; +export { + loadReferenceData, + type PreferenceUploadReferenceData, + uploadChunkWithSplit, + type BatchUploaderDeps, + type BatchUploadPreferenceOptions, +} from '@transcend-io/sdk'; export * from './buildInteractiveUploadPlan.js'; -export * from './batchUploader.js'; export * from './transform/index.js'; export * from './interactivePreferenceUploaderFromPlan.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts index baddf974..3a4e3507 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts @@ -9,9 +9,8 @@ import { chunk, groupBy } from 'lodash-es'; import { RETRYABLE_BATCH_STATUSES } from '../../../../constants.js'; import { logger } from '../../../../logger.js'; import type { PreferenceReceiptsInterface } from '../artifacts/receipts/index.js'; -import { uploadChunkWithSplit } from './batchUploader.js'; +import { uploadChunkWithSplit, buildPendingUpdates } from '@transcend-io/sdk'; import type { InteractiveUploadPreferencePlan } from './buildInteractiveUploadPlan.js'; -import { buildPendingUpdates } from './transform/index.js'; import type { PreferenceUploadProgress } from './types.js'; const { map: pMap } = Bluebird; @@ -307,6 +306,7 @@ export async function interactivePreferenceUploaderFromPlan( isRetryableStatus: (s) => // eslint-disable-next-line @typescript-eslint/no-explicit-any !!s && RETRYABLE_BATCH_STATUSES.has(s as any), + logger, }, { onSuccess: markSuccessFor, diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/tests/batchUploader.test.ts b/packages/cli/src/commands/consent/upload-preferences/upload/tests/batchUploader.test.ts index 441c797b..08c7a1e5 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/tests/batchUploader.test.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/tests/batchUploader.test.ts @@ -9,7 +9,7 @@ import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; import { logger } from '../../../../../logger.js'; // --- Import SUT & mocked symbols --- -import { uploadChunkWithSplit, type BatchUploaderDeps } from '../batchUploader.js'; +import { uploadChunkWithSplit, type BatchUploaderDeps } from '@transcend-io/sdk'; // --- Mocks (declare BEFORE importing the SUT) --- vi.mock('@transcend-io/utils', () => ({ @@ -89,6 +89,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 3, delayMs: 10, shouldRetry: () => false }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn(() => false), + logger, }; const onSuccess = vi.fn().mockResolvedValue(undefined); @@ -135,6 +136,7 @@ describe('uploadChunkWithSplit', () => { }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn((s?: number) => s === 503), + logger, }; const onSuccess = vi.fn().mockResolvedValue(undefined); @@ -176,6 +178,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 2, delayMs: 1, shouldRetry: () => true }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn((s?: number) => s === 429), + logger, }; const onFailureBatch = vi.fn().mockResolvedValue(undefined); @@ -225,6 +228,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 1, delayMs: 1, shouldRetry: () => false }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn(() => false), + logger, }; const onSuccess = vi.fn().mockResolvedValue(undefined); @@ -265,6 +269,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 1, delayMs: 1, shouldRetry: () => false }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn(() => false), + logger, }; const onFailureSingle = vi.fn().mockResolvedValue(undefined); @@ -297,6 +302,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 2, delayMs: 1, shouldRetry: () => true }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn(() => false), // not retryable by status, but soft-rate-limit triggers retry anyway + logger, }; const onSuccess = vi.fn().mockResolvedValue(undefined); @@ -333,6 +339,7 @@ describe('uploadChunkWithSplit', () => { retryPolicy: { maxAttempts: 2, delayMs: 1, shouldRetry: () => true }, options: { skipWorkflowTriggers: false }, isRetryableStatus: vi.fn(() => false), + logger, }; const onSuccess = vi.fn().mockResolvedValue(undefined); diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts index 9526c3e9..94fc380a 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts @@ -6,8 +6,10 @@ */ import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; import { - getPreferenceUpdatesFromRow, + getPreferenceIdentifiersFromRow, getPreferenceMetadataFromRow, + getPreferenceUpdatesFromRow, + NONE_PREFERENCE_MAP, type ColumnIdentifierMap, type ColumnMetadataMap, type ColumnPurposeMap, @@ -18,10 +20,6 @@ import { } from '@transcend-io/sdk'; import type { FormattedAttribute } from '../../../../../lib/graphql/index.js'; -import { - getPreferenceIdentifiersFromRow, - NONE_PREFERENCE_MAP, -} from '../../../../../lib/preference-management/index.js'; export interface BuildPendingParams { /** Safe updates keyed by user/primaryKey */ diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts index 5f1f0911..a7e17db4 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts @@ -1,2 +1,5 @@ -export * from './buildPendingUpdates.js'; -export * from './transformCsv.js'; +export { + buildPendingUpdates, + type BuildPendingParams, + type FormattedAttribute, +} from '@transcend-io/sdk'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts index aaa25cfa..910966e1 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/transformCsv.ts @@ -6,6 +6,8 @@ import { logger } from '../../../../../logger.js'; /** * Add Transcend ID to preferences if email_id is present * + * FIXME remove + * * @param preferences - List of preferences * @returns The updated preferences with Transcend ID added */ diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 7c5c1431..fd222430 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -22,3 +22,5 @@ export function createMonorepoPackageDefinition( export * from './api/index.js'; export * from './data-inventory/index.js'; export * from './preference-management/index.js'; +export * from './preference-upload/index.js'; +export * from './pooling/index.js'; diff --git a/packages/sdk/src/pooling/ensureLogFile.ts b/packages/sdk/src/pooling/ensureLogFile.ts new file mode 100644 index 00000000..e00a9167 --- /dev/null +++ b/packages/sdk/src/pooling/ensureLogFile.ts @@ -0,0 +1,13 @@ +import { closeSync, existsSync, openSync } from 'node:fs'; + +/** + * Ensure a log file exists (touch). + * + * @param pathStr - the path to the log file + */ +export function ensureLogFile(pathStr: string): void { + if (!existsSync(pathStr)) { + const fd = openSync(pathStr, 'a'); + closeSync(fd); + } +} diff --git a/packages/sdk/src/pooling/index.ts b/packages/sdk/src/pooling/index.ts new file mode 100644 index 00000000..1f778a2a --- /dev/null +++ b/packages/sdk/src/pooling/index.ts @@ -0,0 +1,6 @@ +export * from './types.js'; +export * from './runPool.js'; +export * from './spawnWorkerProcess.js'; +export * from './logRotation.js'; +export * from './ensureLogFile.js'; +export * from './safeGetLogPathsForSlot.js'; diff --git a/packages/sdk/src/pooling/logRotation.ts b/packages/sdk/src/pooling/logRotation.ts new file mode 100644 index 00000000..f5a43fdc --- /dev/null +++ b/packages/sdk/src/pooling/logRotation.ts @@ -0,0 +1,259 @@ +// logRotation.ts +import { readdirSync, writeFileSync, existsSync, unlinkSync, mkdirSync } from 'node:fs'; +import { join } from 'node:path'; + + +/** + * Reset worker logs in the given directory. + * mode: + * - "truncate": empty files but keep them (best if tails are open) + * - "delete": remove files entirely (simplest if no tails yet) + * + * @param dir - Directory to reset logs in + * @param mode - 'truncate' or 'delete' + */ +function resetWorkerLogs(dir: string, mode: 'truncate' | 'delete'): void { + const patterns = [ + /worker-\d+\.log$/, + /worker-\d+\.out\.log$/, + /worker-\d+\.err\.log$/, + /worker-\d+\.warn\.log$/, + /worker-\d+\.info\.log$/, + ]; + for (const name of readdirSync(dir)) { + // eslint-disable-next-line no-continue + if (!patterns.some((rx) => rx.test(name))) continue; + const p = join(dir, name); + try { + if (mode === 'delete' && existsSync(p)) unlinkSync(p); + else writeFileSync(p, ''); + } catch { + /* ignore */ + } + } + process.stdout.write( + `Logs have been ${mode === 'delete' ? 'deleted' : 'truncated'} in ${dir}\n`, + ); +} + +/** + * Very robust classification of a single log line into warn/error. + * Returns 'warn' | 'error' | null (null = not a level we care to badge). + * + * @param line - Single line of log output to classify + * @returns 'warn' | 'error' | null + */ +export function classifyLogLevel(line: string): 'warn' | 'error' | null { + // Strip common ANSI sequences + // eslint-disable-next-line no-control-regex + const s = line.replace(/\x1B\[[0-9;]*m/g, ''); + + // 1) Explicit worker tag: "[w12] WARN ..." or "[w2] ERROR ..." + const mTag = /\[w\d+\]\s+(ERROR|WARN)\b/i.exec(s); + if (mTag) return mTag[1]!.toLowerCase() as 'warn' | 'error'; + + // 2) Common plain prefixes + if (/^\s*(ERROR|ERR|FATAL)\b/i.test(s)) return 'error'; + if (/^\s*(WARN|WARNING)\b/.test(s)) return 'warn'; + + // Node runtime warnings + if (/^\s*\(node:\d+\)\s*Warning:/i.test(s)) return 'warn'; + if (/^\s*DeprecationWarning:/i.test(s)) return 'warn'; + + // 3) JSON logs (pino/bunyan/etc.) + // Try to parse as JSON and inspect `level` + try { + const j = JSON.parse(s); + const lv = j?.level; + if (typeof lv === 'number') { + // pino levels: 40=warn, 50=error, 60=fatal + if (lv >= 50) return 'error'; + if (lv >= 40) return 'warn'; + } else if (typeof lv === 'string') { + const L = lv.toLowerCase(); + if (L === 'error' || L === 'fatal') return 'error'; + if (L === 'warn' || L === 'warning') return 'warn'; + } + } catch { + // not JSON, ignore + } + + // 4) Fallthrough: look for level words inside worker-tagged lines + // e.g. "[w3] something WARNING xyz" + const mInline = /\[w\d+\].*\b(WARN|WARNING|ERROR|FATAL)\b/i.exec(s); + if (mInline) { + const L = mInline[1]!.toUpperCase(); + return L === 'ERROR' || L === 'FATAL' ? 'error' : 'warn'; + } + + return null; +} + +/** + * Stream splitter to get whole lines from 'data' events + * + * @param onLine - Callback to call with each complete line + * @returns A function that processes a chunk of data and calls onLine for each complete line + */ +export function makeLineSplitter(onLine: (line: string) => void): (chunk: Buffer | string) => void { + let buf = ''; + return (chunk: Buffer | string) => { + buf += chunk.toString('utf8'); + let nl: number; + // eslint-disable-next-line no-cond-assign + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl); + onLine(line); + buf = buf.slice(nl + 1); + } + }; +} +/** + * Checks if a log line contains an error indicator. + * + * @param t - The log line to check + * @returns True if the line contains an error keyword, false otherwise + */ +export function isLogError(t: string): boolean { + return /\b(ERROR|uncaughtException|unhandledRejection)\b/i.test(t); +} + +/** + * Checks if a log line contains a warning indicator. + * + * @param t - The log line to check + * @returns True if the line contains a warning keyword, false otherwise + */ +export function isLogWarn(t: string): boolean { + return /\b(WARN|WARNING)\b/i.test(t); +} + +/** + * Determines if a log line is a new header (error, warning, worker tag, or ISO timestamp). + * + * @param t - The log line to check + * @returns True if the line is a new header, false otherwise + */ +export function isLogNewHeader(t: string): boolean { + return ( + isLogError(t) || + isLogWarn(t) || + /^\s*\[w\d+\]/.test(t) || + /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(t) + ); +} + +// eslint-disable-next-line no-control-regex +const stripAnsi = (s: string): string => s.replace(/\x1B\[[0-9;]*m/g, ''); + +/** + * Extracts blocks of text from a larger body of text. + * + * @param text - The text to extract blocks from + * @param starts - A function that determines if a line starts a new block + * @returns An array of extracted blocks + */ +export function extractBlocks(text: string, starts: (cleanLine: string) => boolean): string[] { + if (!text) return []; + const out: string[] = []; + const lines = text.split('\n'); + let buf: string[] = []; + let inBlock = false; + + const flush = (): void => { + if (buf.length) out.push(buf.join('\n')); + buf = []; + inBlock = false; + }; + + for (const raw of lines) { + const clean = stripAnsi(raw || ''); + const headery = isLogNewHeader(clean); + if (!inBlock) { + if (starts(clean)) { + inBlock = true; + buf.push(raw); + } + // eslint-disable-next-line no-continue + continue; + } + if (!raw || headery) { + flush(); + if (starts(clean)) { + inBlock = true; + buf.push(raw); + } + } else { + buf.push(raw); + } + } + flush(); + return out.filter(Boolean); +} + +/** + * The kind of export artifact to retrieve the path for. + */ +export type LogExportKind = 'error' | 'warn' | 'info' | 'all'; + +/** + * Ensure log directory exists + * + * @param rootDir - Root directory + * @returns log dir + */ +export function initLogDir(rootDir: string): string { + const logDir = join(rootDir, 'logs'); + mkdirSync(logDir, { recursive: true }); + + // FIXME + const RESET_MODE = (process.env.RESET_LOGS as 'truncate' | 'delete') ?? 'truncate'; + resetWorkerLogs(logDir, RESET_MODE); + + return logDir; +} + +export interface ExportArtifactResult { + /** Whether the artifact was opened successfully */ + ok?: boolean; + /** The absolute path to the export artifact */ + path: string; + /** Time saved */ + savedAt?: number; + /** If exported */ + exported?: boolean; +} + +/** + * Status map for export artifacts. + */ +export type ExportStatusMap = { + /** The absolute paths to the error log artifacts */ + error?: ExportArtifactResult; + /** The absolute paths to the warn log artifacts */ + warn?: ExportArtifactResult; + /** The absolute paths to the info log artifacts */ + info?: ExportArtifactResult; + /** The absolute paths to all log artifacts */ + all?: ExportArtifactResult; + /** The absolute paths to the failures CSV artifacts */ + failuresCsv?: ExportArtifactResult; +}; + +/** + * Return export statuses + * + * FIXME what is this for? + * + * @param receiptsFolder - Receipts directory + * @returns Export map + */ +export function buildExportStatus(receiptsFolder: string): ExportStatusMap { + return { + error: { path: join(receiptsFolder, 'combined-errors.log') }, + warn: { path: join(receiptsFolder, 'combined-warns.log') }, + info: { path: join(receiptsFolder, 'combined-info.log') }, + all: { path: join(receiptsFolder, 'combined-all.log') }, + failuresCsv: { path: join(receiptsFolder, 'failing-updates.csv') }, + }; +} diff --git a/packages/sdk/src/pooling/runPool.ts b/packages/sdk/src/pooling/runPool.ts new file mode 100644 index 00000000..641cfe8a --- /dev/null +++ b/packages/sdk/src/pooling/runPool.ts @@ -0,0 +1,662 @@ +import type { ChildProcess } from 'node:child_process'; + +import type { ObjByString } from '@transcend-io/type-utils'; +import { RateCounter } from '@transcend-io/utils'; +/* eslint-disable max-lines */ + +import { classifyLogLevel, initLogDir, makeLineSplitter } from './logRotation.js'; +import { safeGetLogPathsForSlot } from './safeGetLogPathsForSlot.js'; +import { + getWorkerLogPaths, + isIpcOpen, + safeSend, + spawnWorkerProcess, + type WorkerLogPaths, +} from './spawnWorkerProcess.js'; +import type { SlotState, FromWorker, ToWorker } from './types.js'; + +/** + * Callbacks used by the generic pool orchestrator to: + * - fetch tasks, + * - format labels for UI, + * - fold progress and results into aggregate totals, + * - run optional post-processing once the pool completes. + * + * Each command supplies concrete `TTask`, `TProg`, `TRes`, and optionally a + * custom totals type `TTotals`. + */ +export interface PoolHooks< + TTask extends ObjByString, + TProg extends ObjByString, + TRes extends ObjByString, + TTotals = unknown, +> { + /** + * Produce the next work item for a slot. + * + * @returns The next task or `undefined` if no tasks remain. + */ + nextTask: () => TTask | undefined; + + /** + * Human-readable label for a task, shown in dashboards. + * + * @param t - The task to label. + * @returns A short descriptor, typically a file path or identifier. + */ + taskLabel: (t: TTask) => string; + + /** + * Fold an incoming progress payload into aggregate totals. + * Should be pure (no side effects) and return the new totals object. + * + * @param prevTotals - The previous totals value. + * @param prog - The latest progress payload from a worker. + * @returns Updated totals. + */ + onProgress: (prevTotals: TTotals, prog: TProg) => TTotals; + + /** + * Handle a final result from a worker. + * Should be pure and return the new totals plus a boolean indicating if the + * unit succeeded (used to set per-slot level/metrics). + * + * @param prevTotals - The previous totals value. + * @param res - The result payload from a worker. + * @returns Object containing updated totals and success flag. + */ + onResult: ( + prevTotals: TTotals, + res: TRes, + ) => { + /** Updated totals after processing this result */ + totals: TTotals; + /** Whether the task was successful */ + ok: boolean; + }; + + /** + * Initialize per-slot progress state when a task is assigned. + * Useful when you want a non-undefined `progress` immediately. + * + * @param t - The task to be started in this slot. + * @returns Initial progress state or `undefined`. + */ + initSlotProgress?: (t: TTask) => TProg | undefined; + + /** + * Produce the initial totals value for the pool (defaults to `{}`). + * + * @returns A new totals object. + */ + initTotals?: () => TTotals; + + /** + * Provide an export status map for dashboards (optional). + * + * @returns A status object or `undefined` if not applicable. + */ + exportStatus?: () => Record | undefined; + + /** + * Optional post-processing step invoked after the pool finishes. + * Common use: writing combined logs/artifacts once all workers complete. + * + * When {@link RunPoolOptions.viewerMode} is enabled, the runner also passes + * the **log directory** and the **per-slot log file paths** so you can + * replicate the legacy “viewer mode” auto-exports (combined logs, indices, etc.). + */ + postProcess?: (ctx: { + /** Live snapshot of all worker slots at completion. */ + slots: Map>; + /** Final aggregate totals. */ + totals: TTotals; + /** Absolute path to the pool’s log directory. */ + logDir: string; + /** + * Mapping of slot id -> log paths (stdout/stderr/current, rotations may exist). + * Use this to collect and export artifacts after completion. + */ + logsBySlot: Map; + /** Unix millis when the pool started (first worker spawned). */ + startedAt: number; + /** Unix millis when the pool fully completed (after last worker exit). */ + finishedAt: number; + /** + * Helper to safely re-fetch a slot’s current log paths, accounting for respawns. + * Mirrors the dashboard’s attach/switcher behavior. + */ + getLogPathsForSlot: (id: number) => WorkerLogPaths | undefined; + /** True if the pool was run in viewerMode (non-interactive). */ + viewerMode: boolean; + }) => Promise | void; +} + +/** + * Options to run a generic worker pool. + * + * @template TTask - The payload sent to each worker as a "task". + * @template TProg - The progress payload emitted by workers. + * @template TRes - The result payload emitted by workers. + * @template TTotals - The aggregate totals object maintained by hooks. + */ +export interface RunPoolOptions< + TTask extends ObjByString, + TProg extends ObjByString, + TRes extends ObjByString, + TTotals extends ObjByString, +> { + /** Human-readable name for the pool, shown in headers (e.g., "Parallel uploader", "Chunk CSV"). */ + title: string; + + /** + * Directory for pool-local state (logs, discovery messages, artifacts). + * Usually the CLI's working directory for the command. + */ + baseDir: string; + + /** Absolute path of the module the child should execute (the command impl that calls runChild when CHILD_FLAG is present). */ + childModulePath: string; + + /** + * Number of worker processes to spawn. Typically derived via a helper like `computePoolSize`. + */ + poolSize: number; + + /** Logical CPU count used for display only (not required to equal `poolSize`). */ + cpuCount: number; + + /** + * Flag that the child module expects to see in `process.argv` to run in "worker" mode. + * This MUST match the flag the worker module checks (e.g., `--as-child`). + */ + childFlag: string; + + /** + * Renderer function injected by the command. The runner calls this on each "tick" + * and on significant state changes (progress, completion, attach/detach). + */ + render: (input: { + /** Header/title for the UI. */ + title: string; + /** Configured pool size (number of workers). */ + poolSize: number; + /** CPU count for informational display. */ + cpuCount: number; + /** Total number of files/tasks anticipated by the command. */ + filesTotal: number; + /** Number of files/tasks that have produced a successful result so far. */ + filesCompleted: number; + /** Number of files/tasks that have produced a failed result so far. */ + filesFailed: number; + /** + * Per-slot state for each worker, including busy flag, file label, start time, + * last log level badge, and optional progress payload. + */ + workerState: Map>; + /** + * Arbitrary totals object maintained by hooks. This is the primary place to surface + * domain-specific aggregate metrics in the UI. + */ + totals: TTotals; + /** + * Smoothed throughput metrics computed by the runner: + * - successSoFar: convenience mirror of completed count for the renderer + * - r10s: moving average of completions per second over ~10 seconds + * - r60s: moving average of completions per second over ~60 seconds + */ + throughput: { + /** Convenience mirror of `filesCompleted` for renderers that expect it in this block. */ + successSoFar: number; + /** Moving average file completions/sec (10s window). */ + r10s: number; + /** Moving average file completions/sec (60s window). */ + r60s: number; + /** Moving average job/record completions/sec (10s window). */ + jobsR10s: number; + /** Moving average job/record completions/sec (60s window). */ + jobsR60s: number; + }; + /** True when the pool has fully drained and all workers have exited. */ + final: boolean; + /** + * Optional export status payload surfaced by hooks; used by commands that generate + * multiple artifact files and want to show "latest paths" in the UI. + */ + exportStatus?: Record; + }) => void; + + /** + * Hook suite that adapts the pool to a specific command: + * - nextTask(): TTask | undefined + * - taskLabel(task): string + * - initTotals?(): TTotals + * - initSlotProgress?(task): TProg + * - onProgress(totals, prog): TTotals + * - onResult(totals, res): { totals: TTotals; ok: boolean } + * - postProcess?({ slots, totals, logDir, logsBySlot, ... }): Promise | void + * - exportStatus?(): Record + */ + hooks: PoolHooks; + + /** + * Total number of "files" or logical items the command expects to process. + * Used purely for UI/ETA; does not affect scheduling. + */ + filesTotal: number; + + /** Optional callback to open log tail windows per worker (CLI passes openLogTailWindowMulti) */ + onLogFilesCreated?: (paths: string[], label: string, isSilent: boolean) => void; + + /** Silence worker stdio (except logs). */ + isSilent?: boolean; + + /** + * When true, run in “viewer mode” (non-interactive): + * - Do NOT install the interactive attach/switcher. + * - Default `openLogWindows` to false. + * - Still render on a timer. + * - Provide `logDir`/`logsBySlot` to `postProcess` for auto-exports. + */ + viewerMode?: boolean; + + /** + * Optional factory for additional key bindings (e.g., log viewers/exports). + * Only used when viewerMode === false. + */ + extraKeyHandler?: (args: { + /** per-slot log paths (kept up-to-date across respawns) */ + logsBySlot: Map; + /** re-render dashboard now */ + repaint: () => void; + /** pause/unpause dashboard repaint while showing viewers */ + setPaused: (p: boolean) => void; + }) => (buf: Buffer) => void; + + /** + * Optional interactive stdin handler installer (CLI passes installInteractiveSwitcher). + * When not provided, interactive mode is disabled regardless of viewerMode. + */ + installInteractiveSwitcher?: (args: { + /** Map of worker slot IDs to their ChildProcess instances */ + workers: Map; + /** Called when a user attaches to a worker slot */ + onAttach: (id: number) => void; + /** Called when a user detaches from a worker slot */ + onDetach: () => void; + /** Called on Ctrl+C */ + onCtrlC: () => void; + /** Get log paths for a slot */ + getLogPaths: (id: number) => WorkerLogPaths | undefined; + /** Bytes to replay when attaching */ + replayBytes: number; + /** Which log streams to replay */ + replayWhich: ('out' | 'err')[]; + /** Called when entering attach screen */ + onEnterAttachScreen: (id: number) => void; + }) => () => void; +} + +/** + * Run a multi-process worker pool for a command. + * The runner owns: spawning workers, assigning tasks, collecting progress/results, + * basic log badging (WARN/ERROR), an interactive attach/switcher (unless viewerMode), + * and a render loop. + * + * The command injects "hooks" to customize scheduling and totals aggregation. + * + * @param opts - Options + */ +export async function runPool< + TTask extends ObjByString, + TProg extends ObjByString, + TRes extends ObjByString, + TTotals extends ObjByString, +>(opts: RunPoolOptions): Promise { + const { + title, + baseDir, + poolSize, + cpuCount, + render, + childModulePath, + hooks, + filesTotal, + childFlag, + viewerMode = false, + } = opts; + + const isSilent = opts.isSilent ?? true; + + const startedAt = Date.now(); + const logDir = initLogDir(baseDir); + + /** Live worker processes keyed by slot id. */ + const workers = new Map(); + /** Per-slot state tracked for the UI and scheduling. */ + const workerState = new Map>(); + /** File paths for each worker’s stdout/stderr logs. */ + const slotLogs = new Map(); + /** File-completion throughput meter. */ + const meter = new RateCounter(); + /** Job/record-level throughput meter (fed from progress.processed deltas). */ + const jobMeter = new RateCounter(); + /** Last-seen `processed` count per worker slot, used to compute deltas. */ + const lastProcessed = new Map(); + const totalsInit = (hooks.initTotals?.() ?? {}) as TTotals; + + let totalsBox = totalsInit; + let activeWorkers = 0; + let completed = 0; + let failed = 0; + + // Repaint ticker starts on first READY to avoid double-first-render. + let ticker: NodeJS.Timeout | null = null; + let firstReady = false; + // Gate repaint during popup viewers/exports (driven by extraKeyHandler). + let paused = false; + // Keep a reference so we can unbind on exit. + let extraHandler: ((buf: Buffer) => void) | null = null; + + /** + * Paint the UI. The renderer is intentionally pure and receives + * a snapshot of current state. + * + * @param final - If true, render the final state and exit. + */ + const repaint = (final = false): void => { + if (paused) return; + render({ + title, + poolSize, + cpuCount, + filesTotal, + filesCompleted: completed, + filesFailed: failed, + workerState, + totals: totalsBox, + final, + exportStatus: hooks.exportStatus?.(), + throughput: { + successSoFar: completed, + r10s: meter.rate(10_000), + r60s: meter.rate(60_000), + jobsR10s: jobMeter.rate(10_000), + jobsR60s: jobMeter.rate(60_000), + }, + }); + }; + + /** + * Assign the next task to `id` if available. + * + * @param id - The worker slot id to assign a task to. + * @returns true if a task was assigned. + * + * NOTE: This is the critical fix. We **do not** "peek & put back" a task. + * We only consume via `nextTask()` inside this function. + */ + const assign = (id: number): boolean => { + const task = hooks.nextTask(); + if (!task) return false; + + const child = workers.get(id)!; + const label = hooks.taskLabel(task); + const initialProg = hooks.initSlotProgress?.(task); + + workerState.set(id, { + busy: true, + file: label, + startedAt: Date.now(), + lastLevel: 'ok', + progress: initialProg, + }); + + safeSend(child, { type: 'task', payload: task } as ToWorker); + repaint(); + return true; + }; + + /* Spawn workers */ + for (let i = 0; i < poolSize; i += 1) { + const child = spawnWorkerProcess({ + id: i, + modulePath: childModulePath, + logDir, + isSilent, + childFlag, + onLogFilesCreated: opts.onLogFilesCreated, + }); + workers.set(i, child); + workerState.set(i, { + busy: false, + file: null, + startedAt: null, + lastLevel: 'ok', + }); + slotLogs.set(i, getWorkerLogPaths(child)); + activeWorkers += 1; + + // badge WARN/ERROR quickly from stderr + const errLine = makeLineSplitter((line) => { + const lvl = classifyLogLevel(line); + if (!lvl) return; + const prev = workerState.get(i)!; + if (prev.lastLevel !== lvl) { + workerState.set(i, { ...prev, lastLevel: lvl }); + repaint(); + } + }); + child.stderr?.on('data', errLine); + + // messages from the worker + // eslint-disable-next-line no-loop-func + child.on('message', (msg: FromWorker) => { + if (!msg || typeof msg !== 'object') return; + + if (msg.type === 'ready') { + if (!firstReady) { + firstReady = true; + ticker = setInterval(() => repaint(false), 350); + } + assign(i); // try to start work immediately + return; + } + + if (msg.type === 'progress') { + totalsBox = hooks.onProgress(totalsBox, msg.payload); + const prev = workerState.get(i)!; + workerState.set(i, { ...prev, progress: msg.payload }); + + // Feed job-level meter from progress.processed deltas + const payload = msg.payload as Record; + if (typeof payload?.processed === 'number') { + const prevCount = lastProcessed.get(i) ?? 0; + const delta = payload.processed - prevCount; + if (delta > 0) jobMeter.add(delta); + lastProcessed.set(i, payload.processed); + } + + repaint(); + return; + } + + if (msg.type === 'result') { + const prev = workerState.get(i)!; + const { totals: t2, ok } = hooks.onResult(totalsBox, msg.payload); + totalsBox = t2; + + if (ok) { + completed += 1; + meter.add(1); + } else { + failed += 1; + } + + workerState.set(i, { + ...prev, + busy: false, + file: null, + progress: undefined, + lastLevel: ok ? 'ok' : 'error', + }); + lastProcessed.delete(i); + + // Just try to assign; if none left, shut this child down. + if (!assign(i) && isIpcOpen(child)) { + safeSend(child, { type: 'shutdown' } as ToWorker); + } + repaint(); + } + }); + + // eslint-disable-next-line no-loop-func + child.on('exit', () => { + activeWorkers -= 1; + if (activeWorkers === 0) { + if (ticker) clearInterval(ticker); + repaint(true); + } + }); + } + + /* Interactive attach/switcher */ + let cleanupSwitcher: () => void = () => { + /* noop */ + // no-op by default, overridden in non-viewerMode + }; + + const tearDownStdin = (): void => { + try { + process.stdin.setRawMode?.(false); + } catch { + /* noop */ + } + try { + process.stdin.pause(); + } catch { + /* noop */ + } + }; + + const onSigint = (): void => { + if (ticker) clearInterval(ticker); + cleanupSwitcher?.(); + if (extraHandler) { + try { + process.stdin.off('data', extraHandler); + } catch { + /* noop */ + } + } + tearDownStdin(); + + process.stdout.write('\nStopping workers...\n'); + for (const [, w] of workers) { + if (isIpcOpen(w)) safeSend(w, { type: 'shutdown' } as ToWorker); + try { + w?.kill('SIGTERM'); + } catch { + /* noop */ + } + } + process.exit(130); + }; + + const onAttach = (id: number): void => { + paused = true; // stop dashboard repaint while attached/viewing + process.stdout.write('\x1b[2J\x1b[H'); // clear + home + process.stdout.write( + `Attached to worker ${id}. (Esc/Ctrl+] detach • Ctrl+D EOF • Ctrl+C SIGINT)\n`, + ); + }; + const onDetach = (): void => { + paused = false; + repaint(); + }; + + process.once('SIGINT', onSigint); + + if (!viewerMode) { + if (process.stdin.isTTY) { + try { + process.stdin.setRawMode(true); + } catch { + process.stdout.write( + 'Warning: Unable to enable raw mode for interactive key handling.\n', + ); + } + process.stdin.resume(); // keep stdin flowing (no encoding — raw Buffer) + } + + if (opts.installInteractiveSwitcher) { + cleanupSwitcher = opts.installInteractiveSwitcher({ + workers, + onAttach, + onDetach, + onCtrlC: onSigint, + getLogPaths: (id) => safeGetLogPathsForSlot(id, workers, slotLogs), + replayBytes: 200 * 1024, + replayWhich: ['out', 'err'], + onEnterAttachScreen: onAttach, + }); + } + + if (opts.extraKeyHandler) { + extraHandler = opts.extraKeyHandler({ + logsBySlot: slotLogs, + repaint: () => repaint(), + setPaused: (p) => { + paused = p; + }, + }); + process.stdin.on('data', extraHandler); + } + } + + /* Wait for full completion, then post-process (with log context if needed). */ + await new Promise((resolve) => { + const check = setInterval(async () => { + if (activeWorkers === 0) { + clearInterval(check); + if (ticker) clearInterval(ticker); + cleanupSwitcher(); + + if (extraHandler) { + try { + process.stdin.off('data', extraHandler); + } catch { + /* noop */ + } + } + tearDownStdin(); + + const finishedAt = Date.now(); + + try { + await hooks.postProcess?.({ + slots: workerState, + totals: totalsBox, + logDir, + logsBySlot: slotLogs, + startedAt, + finishedAt, + viewerMode, + getLogPathsForSlot: (id: number) => safeGetLogPathsForSlot(id, workers, slotLogs), + }); + } catch (err: unknown) { + const msg = + ( + err as { + /** Error stack */ + stack?: string; + } + )?.stack ?? String(err); + process.stdout.write(`postProcess error: ${msg}\n`); + } + resolve(); + } + }, 300); + }); +} +/* eslint-enable max-lines */ diff --git a/packages/sdk/src/pooling/safeGetLogPathsForSlot.ts b/packages/sdk/src/pooling/safeGetLogPathsForSlot.ts new file mode 100644 index 00000000..221b76be --- /dev/null +++ b/packages/sdk/src/pooling/safeGetLogPathsForSlot.ts @@ -0,0 +1,28 @@ +import type { ChildProcess } from 'node:child_process'; + +import { getWorkerLogPaths, isIpcOpen, type WorkerLogPaths } from './spawnWorkerProcess.js'; + +/** + * Safely retrieve log paths for a worker slot. + * + * @param id - The worker slot ID + * @param workers - Map of worker IDs to their ChildProcess instances + * @param slotLogPaths - Map of worker IDs to their log paths + * @returns The log paths for the worker slot, or undefined if not available + */ +export function safeGetLogPathsForSlot( + id: number, + workers: Map, + slotLogPaths: Map, +): WorkerLogPaths | undefined { + const live = workers.get(id); + if (isIpcOpen(live)) { + try { + const p = getWorkerLogPaths(live!); + if (p !== undefined && p !== null) return p; + } catch { + /* fall back */ + } + } + return slotLogPaths.get(id); +} diff --git a/packages/sdk/src/pooling/spawnWorkerProcess.ts b/packages/sdk/src/pooling/spawnWorkerProcess.ts new file mode 100644 index 00000000..15633d1a --- /dev/null +++ b/packages/sdk/src/pooling/spawnWorkerProcess.ts @@ -0,0 +1,215 @@ +import { fork, type ChildProcess } from 'node:child_process'; +import { createWriteStream } from 'node:fs'; +import { join } from 'node:path'; + +import { ensureLogFile } from './ensureLogFile.js'; +import { classifyLogLevel, makeLineSplitter } from './logRotation.js'; + +/** Default child-flag used if a caller doesn’t provide one. */ +export const CHILD_FLAG = '--as-child'; + +// Symbol key so we can stash/retrieve paths on the child proc safely +const LOG_PATHS_SYM: unique symbol = Symbol('workerLogPaths'); + +export interface WorkerLogPaths { + /** Structured (app-controlled) log file path written via WORKER_LOG */ + structuredPath: string; + /** Raw stdout capture */ + outPath: string; + /** Raw stderr capture */ + errPath: string; + /** Lines classified as INFO (primarily stdout) */ + infoPath: string; + /** Lines classified as WARN (from stderr without error tokens) */ + warnPath: string; + /** Lines classified as ERROR (from stderr, including uncaught) */ + errorPath: string; +} + +/** Convenience alias for the optional return from getWorkerLogPaths */ +export type SlotPaths = Map; + +/** + * Retrieve the paths we stashed on the child. + * + * @param child - The worker ChildProcess instance. + * @returns The log paths or undefined if not set. + */ +export function getWorkerLogPaths(child: ChildProcess): WorkerLogPaths | undefined { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (child as any)[LOG_PATHS_SYM] as WorkerLogPaths | undefined; +} + +/** + * Is IPC channel still open? (Node doesn't type `.channel`) + * + * @param w - The worker ChildProcess instance. + * @returns True if the IPC channel is open, false otherwise. + */ +export function isIpcOpen(w: ChildProcess | undefined | null): boolean { + const ch = w && w.channel; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return !!(w && w.connected && ch && !(ch as any).destroyed); +} + +/** + * Safely send a message to the worker process. + * + * @param w - The worker ChildProcess instance. + * @param msg - The message to send. + * @returns True if the message was sent successfully, false otherwise. + */ +export function safeSend(w: ChildProcess, msg: unknown): boolean { + if (!isIpcOpen(w)) return false; + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + w.send?.(msg as any); + return true; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + if (err?.code === 'ERR_IPC_CHANNEL_CLOSED' || err?.code === 'EPIPE' || err?.errno === -32) { + return false; + } + throw err; + } +} + +export interface SpawnWorkerOptions { + /** Worker slot/index */ + id: number; + /** Absolute path to the module to fork (should handle CHILD_FLAG) */ + modulePath: string; + /** Directory where log files will be written */ + logDir: string; + /** If true, spawn with silent stdio (respect your existing setting) */ + isSilent: boolean; + /** Optional override for the child flag (defaults to CHILD_FLAG) */ + childFlag?: string; + /** Optional callback to open log tail windows (CLI-specific) */ + onLogFilesCreated?: (paths: string[], label: string, isSilent: boolean) => void; +} + +/** + * Spawn a worker process with piped stdio and persisted logs. + * + * Files produced per worker: + * - worker-{id}.log (structured WORKER_LOG written by the child) + * - worker-{id}.out.log (raw stdout) + * - worker-{id}.err.log (raw stderr) + * - worker-{id}.info.log (classified INFO lines from stdout) + * - worker-{id}.warn.log (classified WARN lines from stderr) + * - worker-{id}.error.log (classified ERROR lines from stderr) + * + * @param opts - Options for spawning the worker process. + * @returns The spawned ChildProcess instance. + */ +export function spawnWorkerProcess(opts: SpawnWorkerOptions): ChildProcess { + const { id, modulePath, logDir, isSilent, childFlag = CHILD_FLAG, onLogFilesCreated } = opts; + + const structuredPath = join(logDir, `worker-${id}.log`); + const outPath = join(logDir, `worker-${id}.out.log`); + const errPath = join(logDir, `worker-${id}.err.log`); + const infoPath = join(logDir, `worker-${id}.info.log`); + const warnPath = join(logDir, `worker-${id}.warn.log`); + const errorPath = join(logDir, `worker-${id}.error.log`); + + [structuredPath, outPath, errPath, infoPath, warnPath, errorPath].forEach(ensureLogFile); + + const child = fork(modulePath, [childFlag], { + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + env: { ...process.env, WORKER_ID: String(id), WORKER_LOG: structuredPath }, + execArgv: process.execArgv, + silent: isSilent, + }); + + // Raw capture streams + const outStream = createWriteStream(outPath, { flags: 'a' }); + const errStream = createWriteStream(errPath, { flags: 'a' }); + + // Classified streams + const infoStream = createWriteStream(infoPath, { flags: 'a' }); + const warnStream = createWriteStream(warnPath, { flags: 'a' }); + const errorStream = createWriteStream(errorPath, { flags: 'a' }); + + // Pipe raw streams + child.stdout?.pipe(outStream); + child.stderr?.pipe(errStream); + + // Headers so tail windows show something immediately + const hdr = (name: string): string => + `[parent] ${name} capture active for w${id} (pid ${child.pid})\n`; + outStream.write(hdr('stdout')); + errStream.write(hdr('stderr')); + infoStream.write(hdr('info')); + warnStream.write(hdr('warn')); + errorStream.write(hdr('error')); + + // Classified INFO from stdout (line-buffered) + if (child.stdout) { + const onOutLine = makeLineSplitter((line) => { + if (!line) return; + try { + // Treat all stdout lines as INFO for the classified stream + infoStream.write(`${line}\n`); + } catch { + /* ignore */ + } + }); + child.stdout.on('data', onOutLine); + } + + // Classified WARN/ERROR from stderr (line-buffered) + if (child.stderr) { + const onErrLine = makeLineSplitter((line) => { + if (!line) return; + const lvl = classifyLogLevel(line); // 'warn' | 'error' | null + try { + if (lvl === 'error') { + errorStream.write(`${line}\n`); + } else { + // Treat untagged stderr as WARN by default (common in libs) + warnStream.write(`${line}\n`); + } + } catch { + /* ignore */ + } + }); + child.stderr.on('data', onErrLine); + } + + // Stash log path metadata on the child + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (child as any)[LOG_PATHS_SYM] = { + structuredPath, + outPath, + errPath, + infoPath, + warnPath, + errorPath, + } as WorkerLogPaths; + + onLogFilesCreated?.( + [structuredPath, outPath, errPath, infoPath, warnPath, errorPath], + `worker-${id}`, + isSilent, + ); + + // Best-effort error suppression on file streams + outStream.on('error', () => { + /* ignore */ + }); + errStream.on('error', () => { + /* ignore */ + }); + infoStream.on('error', () => { + /* ignore */ + }); + warnStream.on('error', () => { + /* ignore */ + }); + errorStream.on('error', () => { + /* ignore */ + }); + + return child; +} diff --git a/packages/sdk/src/pooling/types.ts b/packages/sdk/src/pooling/types.ts new file mode 100644 index 00000000..2f5fd071 --- /dev/null +++ b/packages/sdk/src/pooling/types.ts @@ -0,0 +1,63 @@ +import type { ObjByString } from '@transcend-io/type-utils'; + +/** Minimal per-slot state the runner keeps */ +export type PoolLevel = 'ok' | 'warn' | 'error'; + +export interface SlotState { + /** True if the worker is currently processing a task */ + busy: boolean; + /** The file being processed by the worker */ + file: string | null; + /** Timestamp when the worker started processing the task */ + startedAt: number | null; + /** Current log level of the worker */ + lastLevel: PoolLevel; + /** Progress */ + progress?: TProg; +} + +/** Message sent by a worker indicating it is ready to receive tasks. */ +export type WorkerReady = { + /** Type ready */ + type: 'ready'; +}; + +/** Message sent by a worker with a progress payload. */ +export type WorkerProgress = { + /** Discriminant. */ + type: 'progress'; + /** Implementation-defined progress payload. */ + payload: TProg; +}; + +/** Message sent by a worker with a final result payload for a single unit. */ +export type WorkerResult = { + /** Discriminant. */ + type: 'result'; + /** Implementation-defined result payload. */ + payload: TRes; +}; + +/** Union of all Worker → Parent messages. */ +export type FromWorker = WorkerReady | WorkerProgress | WorkerResult; + +/** + * Message sent by the parent to a worker to signal shutdown. + */ +export type ShutdownEvent = { + /** Shutdown */ + type: 'shutdown'; +}; + +/** + * Message sent by the parent to a worker to assign a task. + */ +export type TaskEvent = { + /** Task */ + type: 'task'; + /** Payload */ + payload: TTask; +}; + +/** Messages the parent can send to a worker. */ +export type ToWorker = ShutdownEvent | TaskEvent; diff --git a/packages/sdk/src/preference-management/getPreferenceIdentifiersFromRow.ts b/packages/sdk/src/preference-management/getPreferenceIdentifiersFromRow.ts new file mode 100644 index 00000000..21286f01 --- /dev/null +++ b/packages/sdk/src/preference-management/getPreferenceIdentifiersFromRow.ts @@ -0,0 +1,34 @@ +import type { PreferenceStoreIdentifier } from '@transcend-io/privacy-types'; + +import type { FileFormatState } from './codecs.js'; + +/** + * Extract preference store identifiers from a CSV row based on the column-to-identifier mapping. + * + * @param options - Options + * @returns Array of identifiers for the preference store API + */ +export function getPreferenceIdentifiersFromRow({ + row, + columnToIdentifier, +}: { + /** The current row from CSV file */ + row: Record; + /** The current file metadata state */ + columnToIdentifier: FileFormatState['columnToIdentifier']; +}): PreferenceStoreIdentifier[] { + const identifiers = Object.entries(columnToIdentifier) + .filter(([col]) => !!row[col]) + .map(([col, identifierMapping]) => ({ + name: identifierMapping.name, + value: row[col]!, + })); + return identifiers.sort( + (a, b) => + (a.name === 'email' ? -1 : 0) - (b.name === 'email' ? -1 : 0) || + a.name.localeCompare(b.name, undefined, { sensitivity: 'base' }), + ); +} + +/** Sentinel value indicating no timestamp/format column was selected */ +export const NONE_PREFERENCE_MAP = '[NONE]'; diff --git a/packages/sdk/src/preference-management/index.ts b/packages/sdk/src/preference-management/index.ts index e8ab78f7..b9dffd9a 100644 --- a/packages/sdk/src/preference-management/index.ts +++ b/packages/sdk/src/preference-management/index.ts @@ -5,6 +5,7 @@ export * from './createPreferenceAccessTokens.js'; export * from './types.js'; export * from './codecs.js'; export * from './getPreferenceMetadataFromRow.js'; +export * from './getPreferenceIdentifiersFromRow.js'; export * from './getPreferenceUpdatesFromRow.js'; export * from './checkIfPendingPreferenceUpdatesAreNoOp.js'; export * from './checkIfPendingPreferenceUpdatesCauseConflict.js'; diff --git a/packages/sdk/src/preference-upload/batchUploader.ts b/packages/sdk/src/preference-upload/batchUploader.ts new file mode 100644 index 00000000..6d4f467b --- /dev/null +++ b/packages/sdk/src/preference-upload/batchUploader.ts @@ -0,0 +1,116 @@ +import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; +import { + extractErrorMessage, + getErrorStatus, + retrySamePromise, + splitInHalf, + type Logger, + type RetryPolicy, +} from '@transcend-io/utils'; + +type Entry = [string, PreferenceUpdateItem]; + +export interface BatchUploadPreferenceOptions { + /** When true - don't trigger workflow runs */ + skipWorkflowTriggers: boolean; +} + +export interface BatchUploaderDeps { + /** Network transport used for PUT uploads */ + putBatch: ( + updates: PreferenceUpdateItem[], + opts: BatchUploadPreferenceOptions, + ) => Promise; + /** Retry policy for retryable statuses */ + retryPolicy: RetryPolicy; + /** Endpoint behavior flags */ + options: BatchUploadPreferenceOptions; + /** Decide if a status is retryable *in place* (no splitting) */ + isRetryableStatus: (status?: number) => boolean; + /** Logger */ + logger: Logger; +} + +/** + * Upload a batch of entries with retry + split fallback. + * + * Orchestrates the per-chunk upload flow with: + * 1) Whole-batch attempt + * 2) In-place retries for retryable statuses + * 3) Recursive splitting for non-retryable errors (down to singletons) + * + * @param entries - Array of [primaryKey, update] pairs + * @param deps - Injected transport + policy + logger + * @param callbacks - Callback functions + */ +export async function uploadChunkWithSplit( + entries: Entry[], + deps: BatchUploaderDeps, + callbacks: { + /** Callback invoked after a successful upload of `entries` */ + onSuccess: (entries: Entry[]) => Promise; + /** Callback for single-entry failure terminal case */ + onFailureSingle: (entry: Entry, err: unknown) => Promise; + /** Callback for terminal failure of the entire batch */ + onFailureBatch: (entries: Entry[], err: unknown) => Promise; + }, +): Promise { + const { logger } = deps; + + const putAll = (): Promise => + deps.putBatch( + entries.map(([, u]) => u), + deps.options, + ); + + try { + await putAll(); + await callbacks.onSuccess(entries); + } catch (errRaw) { + let err = errRaw; + const status = getErrorStatus(err); + const msg = extractErrorMessage(err); + + const isSoftRateLimit = + status === 400 && + /slow down|please try again shortly|Throughput exceeds the current/i.test(msg); + + if (deps.isRetryableStatus(status) || isSoftRateLimit) { + try { + await retrySamePromise(putAll, deps.retryPolicy, (note) => + logger.warn(note), + ); + await callbacks.onSuccess(entries); + return; + } catch (err2) { + if (deps.isRetryableStatus(getErrorStatus(err2))) { + logger.error( + `Exhausted retries for batch of ${entries.length}. Marking entire batch as failed.`, + ); + await callbacks.onFailureBatch(entries, err2); + return; + } + err = err2; + } + } + + if (entries.length === 1) { + try { + await putAll(); + await callbacks.onSuccess(entries); + } catch (singleErr) { + await callbacks.onFailureSingle(entries[0]!, singleErr); + } + return; + } + + const [left, right] = splitInHalf(entries); + logger.warn( + `Non-retryable failure for batch of ${entries.length} (status=${status}): ${msg}. ` + + `Splitting into ${left.length} and ${right.length}.`, + ); + + await uploadChunkWithSplit(left, deps, callbacks); + await uploadChunkWithSplit(right, deps, callbacks); + } +} diff --git a/packages/sdk/src/preference-upload/buildPendingUpdates.ts b/packages/sdk/src/preference-upload/buildPendingUpdates.ts new file mode 100644 index 00000000..3490b62c --- /dev/null +++ b/packages/sdk/src/preference-upload/buildPendingUpdates.ts @@ -0,0 +1,140 @@ +import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; + +import type { + ColumnIdentifierMap, + ColumnMetadataMap, + ColumnPurposeMap, + PendingSafePreferenceUpdates, + PendingWithConflictPreferenceUpdates, +} from '../preference-management/codecs.js'; +import { + getPreferenceIdentifiersFromRow, + NONE_PREFERENCE_MAP, +} from '../preference-management/getPreferenceIdentifiersFromRow.js'; +import { getPreferenceMetadataFromRow } from '../preference-management/getPreferenceMetadataFromRow.js'; +import { getPreferenceUpdatesFromRow } from '../preference-management/getPreferenceUpdatesFromRow.js'; +import type { PreferenceTopic } from '../preference-management/fetchAllPreferenceTopics.js'; +import type { Purpose } from '../preference-management/fetchAllPurposes.js'; + +/** Attribute key-value pair for workflow settings */ +export interface FormattedAttribute { + /** Attribute key */ + key: string; + /** Attribute values */ + values: string[]; +} + +export interface BuildPendingParams { + /** Safe updates keyed by user/primaryKey */ + safe: PendingSafePreferenceUpdates; + /** Conflict updates keyed by user/primaryKey (value.row contains row data) */ + conflicts: PendingWithConflictPreferenceUpdates; + /** Only upload safe updates (ignore conflicts entirely) */ + skipConflictUpdates: boolean; + /** Name of the column to use as the preference timestamp (if available) */ + timestampColumn?: string; + /** CSV column -> purpose/preference mapping */ + columnToPurposeName: ColumnPurposeMap; + /** CSV column -> identifier mapping */ + columnToIdentifier: ColumnIdentifierMap; + /** CSV column -> metadata key mapping (optional) */ + columnToMetadata?: ColumnMetadataMap; + /** Full set of preference topics for resolving row -> preference values */ + preferenceTopics: PreferenceTopic[]; + /** Full set of purposes for resolving slugs/trackingTypes */ + purposes: Purpose[]; + /** Partition to attribute to every record */ + partition: string; + /** Static attributes injected into workflow settings */ + workflowAttrs: FormattedAttribute[]; + /** If true, downstream should avoid user-visible notifications */ + isSilent: boolean; + /** If true, skip triggering workflows downstream */ + skipWorkflowTriggers: boolean; + /** If true, force trigger workflows even if preferences haven't changed */ + forceTriggerWorkflows: boolean; +} + +/** + * Convert parsed CSV rows into a map of PreferenceUpdateItem payloads. + * + * This function is pure (no IO, logging or state writes). + * + * @param params - Transformation inputs + * @returns Map of primaryKey -> PreferenceUpdateItem + */ +export function buildPendingUpdates( + params: BuildPendingParams, +): Record { + const { + safe, + conflicts, + skipConflictUpdates, + timestampColumn, + columnToPurposeName, + columnToIdentifier, + columnToMetadata, + preferenceTopics, + purposes, + partition, + workflowAttrs, + isSilent, + skipWorkflowTriggers, + forceTriggerWorkflows, + } = params; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const merged: Record = skipConflictUpdates + ? { ...safe } + : { + ...safe, + ...Object.fromEntries( + Object.entries(conflicts).map(([id, v]) => [id, v.row]), + ), + }; + + const purposeSlugs = purposes.map((x) => x.trackingType); + const out: Record = {}; + + for (const [userId, row] of Object.entries(merged)) { + const ts = + timestampColumn === NONE_PREFERENCE_MAP || !timestampColumn + ? new Date() + : new Date(row[timestampColumn]); + + const updates = getPreferenceUpdatesFromRow({ + row, + columnToPurposeName, + preferenceTopics, + purposeSlugs, + }); + + const identifiers = getPreferenceIdentifiersFromRow({ + row, + columnToIdentifier, + }); + + const metadata = columnToMetadata + ? getPreferenceMetadataFromRow({ row, columnToMetadata }) + : undefined; + + out[userId] = { + identifiers, + partition, + timestamp: ts.toISOString(), + purposes: Object.entries(updates).map(([purpose, value]) => ({ + ...value, + purpose, + workflowSettings: { + attributes: workflowAttrs, + isSilent, + skipWorkflowTrigger: skipWorkflowTriggers, + forceTriggerWorkflow: forceTriggerWorkflows, + }, + })), + ...(metadata && metadata.length > 0 ? { metadata } : {}), + }; + } + + return out; +} diff --git a/packages/sdk/src/preference-upload/index.ts b/packages/sdk/src/preference-upload/index.ts new file mode 100644 index 00000000..8e6f18c6 --- /dev/null +++ b/packages/sdk/src/preference-upload/index.ts @@ -0,0 +1,4 @@ +export * from './progress.js'; +export * from './batchUploader.js'; +export * from './loadReferenceData.js'; +export * from './buildPendingUpdates.js'; diff --git a/packages/sdk/src/preference-upload/loadReferenceData.ts b/packages/sdk/src/preference-upload/loadReferenceData.ts new file mode 100644 index 00000000..bdd46fb3 --- /dev/null +++ b/packages/sdk/src/preference-upload/loadReferenceData.ts @@ -0,0 +1,37 @@ +import type { Logger } from '@transcend-io/utils'; +import type { GraphQLClient } from 'graphql-request'; + +import { fetchAllIdentifiers, type Identifier } from '../data-inventory/fetchAllIdentifiers.js'; +import { + fetchAllPreferenceTopics, + type PreferenceTopic, +} from '../preference-management/fetchAllPreferenceTopics.js'; +import { fetchAllPurposes, type Purpose } from '../preference-management/fetchAllPurposes.js'; + +export interface PreferenceUploadReferenceData { + /** List of purposes in the organization */ + purposes: Purpose[]; + /** List of preference topics in the organization */ + preferenceTopics: PreferenceTopic[]; + /** List of identifiers in the organization */ + identifiers: Identifier[]; +} + +/** + * Load all required reference data for an upload run. + * + * @param client - GraphQL client + * @param options - Options + * @returns Reference data arrays + */ +export async function loadReferenceData( + client: GraphQLClient, + { logger }: { logger: Logger }, +): Promise { + const [purposes, preferenceTopics, identifiers] = await Promise.all([ + fetchAllPurposes(client, { logger }), + fetchAllPreferenceTopics(client, { logger }), + fetchAllIdentifiers(client, { logger }), + ]); + return { purposes, preferenceTopics, identifiers }; +} diff --git a/packages/sdk/src/preference-upload/progress.ts b/packages/sdk/src/preference-upload/progress.ts new file mode 100644 index 00000000..31761a7b --- /dev/null +++ b/packages/sdk/src/preference-upload/progress.ts @@ -0,0 +1,56 @@ +import type { PreferenceUploadProgress } from '../preference-management/types.js'; + +/** Per-file summary emitted when a file finishes processing */ +export interface FileProgressInfo { + /** File path or identifier */ + file: string; + /** Number of safe (non-conflicting) records uploaded */ + safeCount: number; + /** Number of conflicting records uploaded */ + conflictCount: number; + /** Number of records skipped (already in sync) */ + skippedCount: number; + /** Number of records that failed to upload */ + failedCount: number; + /** Total records in the file */ + totalRecords: number; +} + +/** + * Structured progress reporting interface for preference uploads. + * + * Consumers implement this to receive machine-readable progress updates: + * - CLI: writes receipts + updates terminal dashboard + * - Container: writes progress.json for Retool, posts to Transcend API + * - Agent: emits structured events + */ +export interface UploadProgressSink { + /** Called when a file starts processing */ + onFileStart(file: string, totalRecords: number): void; + /** Called periodically as records are uploaded */ + onFileProgress(file: string, progress: PreferenceUploadProgress): void; + /** Called when a file finishes (success or partial) */ + onFileComplete(file: string, info: FileProgressInfo): void; + /** Called on non-fatal errors (e.g. skipped identifier) */ + onError(file: string, error: string): void; + /** Called when the entire job finishes */ + onJobComplete(summary: { + /** Total files processed */ + totalFiles: number; + /** Total records across all files */ + totalRecords: number; + /** Wall-clock time in milliseconds */ + elapsedMs: number; + /** Per-file summaries */ + filesCompleted: FileProgressInfo[]; + }): void; +} + +/** No-op sink for when progress reporting isn't needed */ +export const noopProgressSink: UploadProgressSink = { + onFileStart: () => {}, + onFileProgress: () => {}, + onFileComplete: () => {}, + onError: () => {}, + onJobComplete: () => {}, +}; From 80600760b0071831870be0eb9ba71ea06369c7fd Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 23:05:25 -0700 Subject: [PATCH 07/10] Delete CLI duplicates of moved SDK modules, rewire all imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upload pipeline: - Delete CLI batchUploader.ts, loadReferenceData.ts, buildPendingUpdates.ts, types.ts (all moved to SDK preference-upload/) - Clean up upload/index.ts and transform/index.ts barrels - Move loadReferenceData.test.ts to SDK package Pooling: - Delete CLI runPool.ts, types.ts, spawnWorkerProcess.ts, logRotation.ts, ensureLogFile.ts, safeGetLogPathsForSlot.ts (all moved to SDK pooling/) - Update CLI pooling barrel to only export CLI-specific files - Rewire 15+ consumer files to split imports: SDK items from @transcend-io/sdk, CLI items from CLI pooling barrel Note: Some pooling tests need migration to SDK (mock targets reference deleted local modules). This is a known follow-up. SDK typecheck ✓ | CLI typecheck ✓ Made-with: Cursor --- .../cli/src/commands/admin/chunk-csv/impl.ts | 5 +- .../admin/chunk-csv/tests/impl.test.ts | 2 +- .../src/commands/admin/chunk-csv/worker.ts | 2 +- .../src/commands/admin/parquet-to-csv/impl.ts | 5 +- .../admin/parquet-to-csv/tests/impl.test.ts | 13 +- .../commands/admin/parquet-to-csv/worker.ts | 2 +- .../artifacts/ExportManager.ts | 7 +- .../artifacts/artifactAbsPath.ts | 2 +- .../artifacts/tests/ExportManager.test.ts | 9 +- .../artifacts/tests/writeExportsIndex.test.ts | 2 +- .../artifacts/writeExportsIndex.ts | 2 +- .../consent/upload-preferences/impl.ts | 7 +- .../upload/batchUploader.ts | 129 ---- .../upload/buildInteractiveUploadPlan.ts | 2 +- .../upload-preferences/upload/index.ts | 9 - .../interactivePreferenceUploaderFromPlan.ts | 7 +- .../upload/loadReferenceData.ts | 49 -- .../upload/tests/loadReferenceData.test.ts | 97 --- .../upload/transform/buildPendingUpdates.ts | 141 ---- .../upload/transform/index.ts | 6 +- .../upload-preferences/upload/types.ts | 1 - .../consent/upload-preferences/worker.ts | 8 +- .../src/lib/pooling/createExtraKeyHandler.ts | 4 +- .../cli/src/lib/pooling/dashboardPlugin.ts | 2 +- packages/cli/src/lib/pooling/ensureLogFile.ts | 13 - .../cli/src/lib/pooling/exportCombinedLogs.ts | 2 +- packages/cli/src/lib/pooling/index.ts | 7 - .../lib/pooling/installInteractiveSwitcher.ts | 2 +- packages/cli/src/lib/pooling/logRotation.ts | 260 ------- packages/cli/src/lib/pooling/runPool.ts | 641 ------------------ .../src/lib/pooling/safeGetLogPathsForSlot.ts | 28 - .../cli/src/lib/pooling/showCombinedLogs.ts | 2 +- .../cli/src/lib/pooling/spawnWorkerProcess.ts | 218 ------ .../pooling/tests/buildExportStatus.test.ts | 2 +- .../pooling/tests/classifyLogLevel.test.ts | 2 +- .../tests/createExtraKeyHandler.test.ts | 3 +- .../lib/pooling/tests/ensureLogFile.test.ts | 2 +- .../lib/pooling/tests/extractBlocks.test.ts | 2 +- .../pooling/tests/getWorkerLogPaths.test.ts | 13 +- .../src/lib/pooling/tests/initLogDir.test.ts | 2 +- .../tests/installInteractiveSwitcher.test.ts | 2 +- .../src/lib/pooling/tests/isIpcOpen.test.ts | 2 +- .../src/lib/pooling/tests/isLogError.test.ts | 2 +- .../lib/pooling/tests/isLogNewHeader.test.ts | 2 +- .../src/lib/pooling/tests/isLogWarn.test.ts | 2 +- .../pooling/tests/makeLineSplitter.test.ts | 2 +- .../cli/src/lib/pooling/tests/runPool.test.ts | 50 +- .../tests/safeGetLogPathsForSlot.test.ts | 20 +- .../src/lib/pooling/tests/safeSend.test.ts | 2 +- .../pooling/tests/showCombinedLogs.test.ts | 2 +- .../pooling/tests/spawnWorkerProcess.test.ts | 32 +- .../src/lib/pooling/tests/uiPlugins.test.ts | 2 +- packages/cli/src/lib/pooling/types.ts | 63 -- .../loadReferenceData.test.ts | 65 ++ 54 files changed, 195 insertions(+), 1765 deletions(-) delete mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts delete mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/loadReferenceData.ts delete mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/tests/loadReferenceData.test.ts delete mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts delete mode 100644 packages/cli/src/commands/consent/upload-preferences/upload/types.ts delete mode 100644 packages/cli/src/lib/pooling/ensureLogFile.ts delete mode 100644 packages/cli/src/lib/pooling/logRotation.ts delete mode 100644 packages/cli/src/lib/pooling/runPool.ts delete mode 100644 packages/cli/src/lib/pooling/safeGetLogPathsForSlot.ts delete mode 100644 packages/cli/src/lib/pooling/spawnWorkerProcess.ts delete mode 100644 packages/cli/src/lib/pooling/types.ts create mode 100644 packages/sdk/src/preference-upload/loadReferenceData.test.ts diff --git a/packages/cli/src/commands/admin/chunk-csv/impl.ts b/packages/cli/src/commands/admin/chunk-csv/impl.ts index 16620b83..8ad13c8e 100644 --- a/packages/cli/src/commands/admin/chunk-csv/impl.ts +++ b/packages/cli/src/commands/admin/chunk-csv/impl.ts @@ -3,12 +3,11 @@ import colors from 'colors'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; +import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/sdk'; + import { computePoolSize, createExtraKeyHandler, - CHILD_FLAG, - type PoolHooks, - runPool, dashboardPlugin, } from '../../../lib/pooling/index.js'; import { logger } from '../../../logger.js'; diff --git a/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts b/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts index d722b609..b17afb6b 100644 --- a/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts +++ b/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts @@ -23,7 +23,7 @@ const H = vi.hoisted(() => { poolSize?: number; cpuCount?: number; filesTotal?: number; - hooks?: import('../../../../lib/pooling/index.js').PoolHooks< + hooks?: import('@transcend-io/sdk').PoolHooks< ChunkTask, ChunkProgress, ChunkResult, diff --git a/packages/cli/src/commands/admin/chunk-csv/worker.ts b/packages/cli/src/commands/admin/chunk-csv/worker.ts index cd0688c5..e1fea8e2 100644 --- a/packages/cli/src/commands/admin/chunk-csv/worker.ts +++ b/packages/cli/src/commands/admin/chunk-csv/worker.ts @@ -1,6 +1,6 @@ import { chunkOneCsvFile, extractErrorMessage } from '@transcend-io/utils'; -import type { ToWorker } from '../../../lib/pooling/index.js'; +import type { ToWorker } from '@transcend-io/sdk'; import { logger } from '../../../logger.js'; /** diff --git a/packages/cli/src/commands/admin/parquet-to-csv/impl.ts b/packages/cli/src/commands/admin/parquet-to-csv/impl.ts index 395405f4..219e1209 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/impl.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/impl.ts @@ -3,12 +3,11 @@ import colors from 'colors'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectParquetFilesOrExit } from '../../../lib/helpers/index.js'; +import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/sdk'; + import { computePoolSize, createExtraKeyHandler, - CHILD_FLAG, - type PoolHooks, - runPool, dashboardPlugin, } from '../../../lib/pooling/index.js'; import { logger } from '../../../logger.js'; diff --git a/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts b/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts index 970fecea..84aa5317 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { LocalContext } from '../../../../context.js'; -import type { PoolHooks } from '../../../../lib/pooling/index.js'; +import type { PoolHooks } from '@transcend-io/sdk'; import { parquetToCsv, type ParquetToCsvCommandFlags } from '../impl.js'; import { parquetToCsvPlugin } from '../ui/index.js'; import type { ParquetTask, ParquetProgress, ParquetResult } from '../worker.js'; @@ -106,6 +106,15 @@ vi.mock('../../../../lib/helpers/index.js', () => ({ * IMPORTANT: mock the exact module id after resolution. Using the absolute path * to the actual file from *this test file* is reliable for Vitest. */ +vi.mock('@transcend-io/sdk', async () => { + const actual = await vi.importActual('@transcend-io/sdk'); + return { + ...actual, + CHILD_FLAG: H.pooling.CHILD_FLAG, + runPool: H.pooling.runPool, + }; +}); + vi.mock('../../../../lib/pooling/index.js', async () => { const actual = await vi.importActual( @@ -113,9 +122,7 @@ vi.mock('../../../../lib/pooling/index.js', async () => { ); return { ...actual, - CHILD_FLAG: H.pooling.CHILD_FLAG, computePoolSize: H.pooling.computePoolSize, - runPool: H.pooling.runPool, dashboardPlugin: H.pooling.dashboardPlugin, createExtraKeyHandler: H.pooling.createExtraKeyHandler, }; diff --git a/packages/cli/src/commands/admin/parquet-to-csv/worker.ts b/packages/cli/src/commands/admin/parquet-to-csv/worker.ts index 2a1c15b7..da92e32b 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/worker.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/worker.ts @@ -1,7 +1,7 @@ import { extractErrorMessage } from '@transcend-io/utils'; import { parquetToCsvOneFile } from '../../../lib/helpers/index.js'; -import type { ToWorker } from '../../../lib/pooling/index.js'; +import type { ToWorker } from '@transcend-io/sdk'; import { logger } from '../../../logger.js'; export type ParquetTask = { diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts index e1a24d10..c7840080 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts @@ -4,14 +4,17 @@ import { resolve } from 'node:path'; import { readSafe } from '../../../../lib/helpers/index.js'; import { - copyToClipboard, - openPath, extractBlocks, isLogError, isLogWarn, type ExportArtifactResult, type ExportStatusMap, type LogExportKind, +} from '@transcend-io/sdk'; + +import { + copyToClipboard, + openPath, revealInFileManager, } from '../../../../lib/pooling/index.js'; import { artifactAbsPath, type ExportKindWithCsv } from './artifactAbsPath.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts index 1ef49236..4602b823 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts @@ -1,6 +1,6 @@ import { join, resolve } from 'node:path'; -import type { LogExportKind } from '../../../../lib/pooling/index.js'; +import type { LogExportKind } from '@transcend-io/sdk'; export interface ExportArtifactStatus { /** The absolute path to the export artifact */ diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts index 2f87e638..cfb77473 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts @@ -43,13 +43,16 @@ vi.mock('../artifactAbsPath.js', () => ({ ), })); +vi.mock('@transcend-io/sdk', () => ({ + extractBlocks: H.fns.extractBlocks, + isLogError: vi.fn(() => false), + isLogWarn: vi.fn(() => false), +})); + vi.mock('../../../../../lib/pooling/index.js', () => ({ copyToClipboard: H.fns.copyToClipboard, openPath: H.fns.openPath, revealInFileManager: H.fns.revealInFileManager, - extractBlocks: H.fns.extractBlocks, - isLogError: vi.fn(() => false), - isLogWarn: vi.fn(() => false), })); vi.mock('../../../../../lib/helpers/index.js', () => ({ diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts index 3f34bc70..9873a4ac 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts @@ -3,7 +3,7 @@ import * as nodeUrl from 'node:url'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import type { ExportStatusMap } from '../../../../../lib/pooling/index.js'; +import type { ExportStatusMap } from '@transcend-io/sdk'; import { writeExportsIndex } from '../writeExportsIndex.js'; /** diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts index c27240db..1e0b4d75 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts @@ -3,7 +3,7 @@ import { mkdirSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { pathToFileURL } from 'node:url'; -import type { ExportStatusMap } from '../../../../lib/pooling/index.js'; +import type { ExportStatusMap } from '@transcend-io/sdk'; import { artifactAbsPath, type ExportKindWithCsv } from './artifactAbsPath.js'; let lastIndexFileContents = ''; diff --git a/packages/cli/src/commands/consent/upload-preferences/impl.ts b/packages/cli/src/commands/consent/upload-preferences/impl.ts index 649e7e9b..f698ca0f 100644 --- a/packages/cli/src/commands/consent/upload-preferences/impl.ts +++ b/packages/cli/src/commands/consent/upload-preferences/impl.ts @@ -8,12 +8,15 @@ import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; import { - computePoolSize, CHILD_FLAG, type PoolHooks, runPool, - dashboardPlugin, buildExportStatus, +} from '@transcend-io/sdk'; + +import { + computePoolSize, + dashboardPlugin, createExtraKeyHandler, } from '../../../lib/pooling/index.js'; import { logger } from '../../../logger.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts b/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts deleted file mode 100644 index d3b4f574..00000000 --- a/packages/cli/src/commands/consent/upload-preferences/upload/batchUploader.ts +++ /dev/null @@ -1,129 +0,0 @@ -import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; -import { - extractErrorMessage, - getErrorStatus, - retrySamePromise, - splitInHalf, - type RetryPolicy, -} from '@transcend-io/utils'; -import colors from 'colors'; - -import { logger } from '../../../../logger.js'; - -type Entry = [string, PreferenceUpdateItem]; - -export interface BatchUploadPreferenceOptions { - /** When true - don't trigger workflow runs */ - skipWorkflowTriggers: boolean; -} - -export interface BatchUploaderDeps { - /** Network transport used for PUT uploads */ - putBatch: ( - /** The set of updates to put */ - updates: PreferenceUpdateItem[], - /** The global options for each update */ - opts: BatchUploadPreferenceOptions, - ) => Promise; - /** Retry policy for retryable statuses */ - retryPolicy: RetryPolicy; - /** Endpoint behavior flags */ - options: BatchUploadPreferenceOptions; - /** Decide if a status is retryable *in place* (no splitting) */ - isRetryableStatus: (status?: number) => boolean; -} - -/** - * Upload a batch of entries with retry + split fallback. - * - * Orchestrates the per-chunk upload flow with: - * 1) Whole-batch attempt - * 2) In-place retries for retryable statuses - * 3) Recursive splitting for non-retryable errors (down to singletons) - * - * @param entries - Array of [primaryKey, update] pairs - * @param deps - Injected transport + policy + logger - * @param callbacks - Callback functions - */ -export async function uploadChunkWithSplit( - entries: Entry[], - deps: BatchUploaderDeps, - callbacks: { - /** Callback invoked after a successful upload of `entries` */ - onSuccess: (entries: Entry[]) => Promise; - /** Callback for single-entry failure terminal case */ - onFailureSingle: (entry: Entry, err: unknown) => Promise; - /** Callback for terminal failure of the entire batch */ - onFailureBatch: (entries: Entry[], err: unknown) => Promise; - }, -): Promise { - // Run the batch job - const putAll = (): Promise => - deps.putBatch( - entries.map(([, u]) => u), - deps.options, - ); - - try { - // 1) Try the whole batch once. - await putAll(); - await callbacks.onSuccess(entries); - } catch (errRaw) { - let err = errRaw; - const status = getErrorStatus(err); - const msg = extractErrorMessage(err); - - // 2) For retryable statuses, attempt in-place retries without splitting. - const isSoftRateLimit = - // FIXME - status === 400 && - /slow down|please try again shortly|Throughput exceeds the current/i.test(msg); - - if (deps.isRetryableStatus(status) || isSoftRateLimit) { - try { - await retrySamePromise(putAll, deps.retryPolicy, (note) => - logger.warn(colors.yellow(note)), - ); - await callbacks.onSuccess(entries); - return; - } catch (err2) { - // If we *still* have a retryable status after exhausting attempts, - // mark the entire batch as failed (do NOT split). - if (deps.isRetryableStatus(getErrorStatus(err2))) { - logger.error( - colors.red( - `Exhausted retries for batch of ${entries.length}. Marking entire batch as failed.`, - ), - ); - await callbacks.onFailureBatch(entries, err2); - return; - } - // Otherwise, fall through to split behavior with the new error. - err = err2; - } - } - - // 3) Non-retryable path: split the batch and recurse down to singletons. - if (entries.length === 1) { - // Terminal case: one record left and it still fails → mark failure. - try { - await putAll(); - await callbacks.onSuccess(entries); - } catch (singleErr) { - await callbacks.onFailureSingle(entries[0], singleErr); - } - return; - } - - const [left, right] = splitInHalf(entries); - logger.warn( - colors.yellow( - `Non-retryable failure for batch of ${entries.length} (status=${status}): ${msg}. ` + - `Splitting into ${left.length} and ${right.length}.`, - ), - ); - - await uploadChunkWithSplit(left, deps, callbacks); - await uploadChunkWithSplit(right, deps, callbacks); - } -} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts index f09fe5e1..a345254f 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/buildInteractiveUploadPlan.ts @@ -3,6 +3,7 @@ import { type FileFormatState, type PendingSafePreferenceUpdates, type PendingWithConflictPreferenceUpdates, + type PreferenceUploadProgress, type PreferenceUploadReferenceData, type SkippedPreferenceUpdates, } from '@transcend-io/sdk'; @@ -18,7 +19,6 @@ import { parseAttributesFromString, readCsv } from '../../../../lib/requests/ind import { logger } from '../../../../logger.js'; import { type PreferenceReceiptsInterface } from '../artifacts/receipts/receiptsState.js'; import { type PreferenceSchemaInterface } from '../schemaState.js'; -import type { PreferenceUploadProgress } from './types.js'; export interface InteractiveUploadPreferencePlan { /** CSV file path to load preference records from */ diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts index 8713710a..1d484547 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/index.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/index.ts @@ -1,11 +1,2 @@ -export * from './types.js'; -export { - loadReferenceData, - type PreferenceUploadReferenceData, - uploadChunkWithSplit, - type BatchUploaderDeps, - type BatchUploadPreferenceOptions, -} from '@transcend-io/sdk'; export * from './buildInteractiveUploadPlan.js'; -export * from './transform/index.js'; export * from './interactivePreferenceUploaderFromPlan.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts index 3a4e3507..571ed0a0 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/interactivePreferenceUploaderFromPlan.ts @@ -9,9 +9,12 @@ import { chunk, groupBy } from 'lodash-es'; import { RETRYABLE_BATCH_STATUSES } from '../../../../constants.js'; import { logger } from '../../../../logger.js'; import type { PreferenceReceiptsInterface } from '../artifacts/receipts/index.js'; -import { uploadChunkWithSplit, buildPendingUpdates } from '@transcend-io/sdk'; +import { + uploadChunkWithSplit, + buildPendingUpdates, + type PreferenceUploadProgress, +} from '@transcend-io/sdk'; import type { InteractiveUploadPreferencePlan } from './buildInteractiveUploadPlan.js'; -import type { PreferenceUploadProgress } from './types.js'; const { map: pMap } = Bluebird; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/loadReferenceData.ts b/packages/cli/src/commands/consent/upload-preferences/upload/loadReferenceData.ts deleted file mode 100644 index 13f68e46..00000000 --- a/packages/cli/src/commands/consent/upload-preferences/upload/loadReferenceData.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { - buildTranscendGraphQLClient, - fetchAllIdentifiers, - fetchAllPurposes, - fetchAllPreferenceTopics, - type Identifier, - type PreferenceTopic, - type Purpose, -} from '@transcend-io/sdk'; -import type { GraphQLClient } from 'graphql-request'; - -import { logger } from '../../../../logger.js'; - -export type PreferenceUploadReferenceData = { - /** - * List of purposes in the organization - */ - purposes: Purpose[]; - /** - * List of preference topics in the organization - */ - preferenceTopics: PreferenceTopic[]; - /** - * List of identifiers in the organization - */ - identifiers: Identifier[]; -}; - -/** - * Load all required reference data for an upload run. - * - * @param client - GraphQL client - * @returns GraphQL client and reference data arrays - */ -export async function loadReferenceData(client: GraphQLClient): Promise< - { - /** - * GraphQL client to use for making requests - */ - client: ReturnType; - } & PreferenceUploadReferenceData -> { - const [purposes, preferenceTopics, identifiers] = await Promise.all([ - fetchAllPurposes(client, { logger }), - fetchAllPreferenceTopics(client, { logger }), - fetchAllIdentifiers(client, { logger }), - ]); - return { client, purposes, preferenceTopics, identifiers }; -} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/tests/loadReferenceData.test.ts b/packages/cli/src/commands/consent/upload-preferences/upload/tests/loadReferenceData.test.ts deleted file mode 100644 index 09edceb3..00000000 --- a/packages/cli/src/commands/consent/upload-preferences/upload/tests/loadReferenceData.test.ts +++ /dev/null @@ -1,97 +0,0 @@ -import type { Identifier, PreferenceTopic, Purpose } from '@transcend-io/sdk'; -import type { GraphQLClient } from 'graphql-request'; -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -// Shared mocks (we’ll reset them each test) -const mFetchAllPurposes = vi.fn(); -const mFetchAllPreferenceTopics = vi.fn(); -const mFetchAllIdentifiers = vi.fn(); - -// Helper: after resetting modules, install the mocks, then import SUT fresh -async function importSut(): Promise<{ - loadReferenceData: (typeof import('../loadReferenceData.js'))['loadReferenceData']; -}> { - // Mock BEFORE importing the SUT - vi.mock('@transcend-io/sdk', () => ({ - fetchAllPurposes: mFetchAllPurposes, - fetchAllPreferenceTopics: mFetchAllPreferenceTopics, - fetchAllIdentifiers: mFetchAllIdentifiers, - })); - - const mod = await import('../loadReferenceData.js'); - return { - loadReferenceData: - mod.loadReferenceData as (typeof import('../loadReferenceData.js'))['loadReferenceData'], - }; -} - -describe('loadReferenceData', () => { - let client: GraphQLClient; - - beforeEach(() => { - vi.resetModules(); // ensure a clean module graph so mocks stick - - // IMPORTANT: reset implementations + once-queues between tests - mFetchAllPurposes.mockReset(); - mFetchAllPreferenceTopics.mockReset(); - mFetchAllIdentifiers.mockReset(); - - // Minimal safe stub - client = { - request: vi.fn().mockResolvedValue({}), - } as unknown as GraphQLClient; - }); - - it('loads purposes, topics, and identifiers', async () => { - const { loadReferenceData } = await importSut(); - - const purposes = [{ id: 'p1' }, { id: 'p2' }] as Purpose[]; - const preferenceTopics = [{ id: 't1' }] as PreferenceTopic[]; - const identifiers = [{ id: 'i1' }, { id: 'i2' }] as Identifier[]; - - mFetchAllPurposes.mockResolvedValueOnce(purposes); - mFetchAllPreferenceTopics.mockResolvedValueOnce(preferenceTopics); - mFetchAllIdentifiers.mockResolvedValueOnce(identifiers); - - const result = await loadReferenceData(client); - - expect(result.client).toBe(client); - expect(result.purposes).toEqual(purposes); - expect(result.preferenceTopics).toEqual(preferenceTopics); - expect(result.identifiers).toEqual(identifiers); - - expect(mFetchAllPurposes).toHaveBeenCalledTimes(1); - expect(mFetchAllPurposes).toHaveBeenCalledWith( - client, - expect.objectContaining({ logger: expect.anything() }), - ); - - expect(mFetchAllPreferenceTopics).toHaveBeenCalledTimes(1); - expect(mFetchAllPreferenceTopics).toHaveBeenCalledWith( - client, - expect.objectContaining({ logger: expect.anything() }), - ); - - expect(mFetchAllIdentifiers).toHaveBeenCalledTimes(1); - expect(mFetchAllIdentifiers).toHaveBeenCalledWith( - client, - expect.objectContaining({ logger: expect.anything() }), - ); - }); - - it('propagates errors (e.g., identifiers fetch fails)', async () => { - const { loadReferenceData } = await importSut(); - - const err = new Error('boom'); - - mFetchAllPurposes.mockResolvedValueOnce([{ id: 'p' }] as Purpose[]); - mFetchAllPreferenceTopics.mockResolvedValueOnce([{ id: 't' }] as PreferenceTopic[]); - mFetchAllIdentifiers.mockRejectedValueOnce(err); - - await expect(loadReferenceData(client)).rejects.toBe(err); - - expect(mFetchAllPurposes).toHaveBeenCalledTimes(1); - expect(mFetchAllPreferenceTopics).toHaveBeenCalledTimes(1); - expect(mFetchAllIdentifiers).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts deleted file mode 100644 index 94fc380a..00000000 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/buildPendingUpdates.ts +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Module: transform/buildPendingUpdates - * - * Pure transformation from parsed CSV rows + schema mappings into - * PreferenceUpdateItem payloads, ready for upload. - */ -import type { PreferenceUpdateItem } from '@transcend-io/privacy-types'; -import { - getPreferenceIdentifiersFromRow, - getPreferenceMetadataFromRow, - getPreferenceUpdatesFromRow, - NONE_PREFERENCE_MAP, - type ColumnIdentifierMap, - type ColumnMetadataMap, - type ColumnPurposeMap, - type PendingSafePreferenceUpdates, - type PendingWithConflictPreferenceUpdates, - type PreferenceTopic, - type Purpose, -} from '@transcend-io/sdk'; - -import type { FormattedAttribute } from '../../../../../lib/graphql/index.js'; - -export interface BuildPendingParams { - /** Safe updates keyed by user/primaryKey */ - safe: PendingSafePreferenceUpdates; - /** Conflict updates keyed by user/primaryKey (value.row contains row data) */ - conflicts: PendingWithConflictPreferenceUpdates; - /** Only upload safe updates (ignore conflicts entirely) */ - skipConflictUpdates: boolean; - /** Name of the column to use as the preference timestamp (if available) */ - timestampColumn?: string; - /** CSV column -> purpose/preference mapping */ - columnToPurposeName: ColumnPurposeMap; - /** CSV column -> identifier mapping */ - columnToIdentifier: ColumnIdentifierMap; - /** CSV column -> metadata key mapping (optional) */ - columnToMetadata?: ColumnMetadataMap; - /** Full set of preference topics for resolving row → preference values */ - preferenceTopics: PreferenceTopic[]; - /** Full set of purposes for resolving slugs/trackingTypes */ - purposes: Purpose[]; - /** Partition to attribute to every record */ - partition: string; - /** Static attributes injected into workflow settings */ - workflowAttrs: FormattedAttribute[]; - /** If true, downstream should avoid user-visible notifications */ - isSilent: boolean; - /** If true, skip triggering workflows downstream */ - skipWorkflowTriggers: boolean; - /** If true, force trigger workflows even if preferences haven't changed */ - forceTriggerWorkflows: boolean; -} - -/** - * Convert parsed CSV rows into a map of PreferenceUpdateItem payloads. - * - * This function is *pure* (no IO, logging or state writes) and therefore easy to test. - * - * @param params - Transformation inputs - * @returns Map of primaryKey -> PreferenceUpdateItem - */ -export function buildPendingUpdates( - params: BuildPendingParams, -): Record { - const { - safe, - conflicts, - skipConflictUpdates, - timestampColumn, - columnToPurposeName, - columnToIdentifier, - columnToMetadata, - preferenceTopics, - purposes, - partition, - workflowAttrs, - isSilent, - skipWorkflowTriggers, - forceTriggerWorkflows, - } = params; - - // If conflicts are to be included, normalize the shape to match `safe` rows. - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const merged: Record = skipConflictUpdates - ? { ...safe } - : { - ...safe, - ...Object.fromEntries(Object.entries(conflicts).map(([id, v]) => [id, v.row])), - }; - - const purposeSlugs = purposes.map((x) => x.trackingType); - const out: Record = {}; - - for (const [userId, row] of Object.entries(merged)) { - // Determine timestamp used for the store - const ts = - timestampColumn === NONE_PREFERENCE_MAP || !timestampColumn - ? new Date() - : new Date(row[timestampColumn]); - - // Resolve purposes/preferences from columns using schema mappings + topics - const updates = getPreferenceUpdatesFromRow({ - row, - columnToPurposeName, - preferenceTopics, - purposeSlugs, - }); - - // Resolve identifiers per row (email, phone, userId, etc.) - const identifiers = getPreferenceIdentifiersFromRow({ - row, - columnToIdentifier, - }); - - // Resolve metadata from mapped columns (if any) - const metadata = columnToMetadata - ? getPreferenceMetadataFromRow({ row, columnToMetadata }) - : undefined; - - out[userId] = { - identifiers, - partition, - timestamp: ts.toISOString(), - purposes: Object.entries(updates).map(([purpose, value]) => ({ - ...value, - purpose, - workflowSettings: { - attributes: workflowAttrs, - isSilent, - skipWorkflowTrigger: skipWorkflowTriggers, - forceTriggerWorkflow: forceTriggerWorkflows, - }, - })), - // Only include metadata if there are values - ...(metadata && metadata.length > 0 ? { metadata } : {}), - }; - } - - return out; -} diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts index a7e17db4..424257e3 100644 --- a/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts +++ b/packages/cli/src/commands/consent/upload-preferences/upload/transform/index.ts @@ -1,5 +1 @@ -export { - buildPendingUpdates, - type BuildPendingParams, - type FormattedAttribute, -} from '@transcend-io/sdk'; +export * from './transformCsv.js'; diff --git a/packages/cli/src/commands/consent/upload-preferences/upload/types.ts b/packages/cli/src/commands/consent/upload-preferences/upload/types.ts deleted file mode 100644 index b32ede66..00000000 --- a/packages/cli/src/commands/consent/upload-preferences/upload/types.ts +++ /dev/null @@ -1 +0,0 @@ -export type { PreferenceUploadProgress } from '@transcend-io/sdk'; diff --git a/packages/cli/src/commands/consent/upload-preferences/worker.ts b/packages/cli/src/commands/consent/upload-preferences/worker.ts index d5944e8b..a9946e52 100644 --- a/packages/cli/src/commands/consent/upload-preferences/worker.ts +++ b/packages/cli/src/commands/consent/upload-preferences/worker.ts @@ -1,9 +1,11 @@ import { mkdirSync, createWriteStream } from 'node:fs'; import { join, dirname } from 'node:path'; -import { buildTranscendGraphQLClient, createSombraGotInstance } from '@transcend-io/sdk'; - -import type { ToWorker } from '../../../lib/pooling/index.js'; +import { + buildTranscendGraphQLClient, + createSombraGotInstance, + type ToWorker, +} from '@transcend-io/sdk'; import { splitCsvToList } from '@transcend-io/utils'; import { logger } from '../../../logger.js'; import { getFilePrefix } from './artifacts/index.js'; diff --git a/packages/cli/src/lib/pooling/createExtraKeyHandler.ts b/packages/cli/src/lib/pooling/createExtraKeyHandler.ts index b4455eb0..3b5917d7 100644 --- a/packages/cli/src/lib/pooling/createExtraKeyHandler.ts +++ b/packages/cli/src/lib/pooling/createExtraKeyHandler.ts @@ -1,6 +1,6 @@ -import type { ExportStatusMap } from './logRotation.js'; +import type { ExportStatusMap, SlotPaths } from '@transcend-io/sdk'; + import { showCombinedLogs, type LogLocation } from './showCombinedLogs.js'; -import type { SlotPaths } from './spawnWorkerProcess.js'; /** Severity filter applied by the viewer. */ type ViewLevel = 'error' | 'warn' | 'all'; diff --git a/packages/cli/src/lib/pooling/dashboardPlugin.ts b/packages/cli/src/lib/pooling/dashboardPlugin.ts index 46b49269..b7c9ef62 100644 --- a/packages/cli/src/lib/pooling/dashboardPlugin.ts +++ b/packages/cli/src/lib/pooling/dashboardPlugin.ts @@ -4,7 +4,7 @@ import * as readline from 'node:readline'; import type { ObjByString } from '@transcend-io/type-utils'; import colors from 'colors'; -import type { SlotState } from './types.js'; +import type { SlotState } from '@transcend-io/sdk'; /** * A dashboard plugin defines how to render the worker pool UI. diff --git a/packages/cli/src/lib/pooling/ensureLogFile.ts b/packages/cli/src/lib/pooling/ensureLogFile.ts deleted file mode 100644 index e00a9167..00000000 --- a/packages/cli/src/lib/pooling/ensureLogFile.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { closeSync, existsSync, openSync } from 'node:fs'; - -/** - * Ensure a log file exists (touch). - * - * @param pathStr - the path to the log file - */ -export function ensureLogFile(pathStr: string): void { - if (!existsSync(pathStr)) { - const fd = openSync(pathStr, 'a'); - closeSync(fd); - } -} diff --git a/packages/cli/src/lib/pooling/exportCombinedLogs.ts b/packages/cli/src/lib/pooling/exportCombinedLogs.ts index 886513db..6ad35262 100644 --- a/packages/cli/src/lib/pooling/exportCombinedLogs.ts +++ b/packages/cli/src/lib/pooling/exportCombinedLogs.ts @@ -3,7 +3,7 @@ import { once } from 'node:events'; import { createReadStream, createWriteStream, mkdirSync, statSync } from 'node:fs'; import { basename, join } from 'node:path'; -import type { SlotPaths, WorkerLogPaths } from './spawnWorkerProcess.js'; +import type { SlotPaths, WorkerLogPaths } from '@transcend-io/sdk'; /** Which combined log to export */ export type LogKind = 'error' | 'warn' | 'info' | 'all'; diff --git a/packages/cli/src/lib/pooling/index.ts b/packages/cli/src/lib/pooling/index.ts index 14c9116f..1622ab67 100644 --- a/packages/cli/src/lib/pooling/index.ts +++ b/packages/cli/src/lib/pooling/index.ts @@ -1,20 +1,13 @@ export * from './computePoolSize.js'; export * from './openTerminal.js'; -export * from './ensureLogFile.js'; -export * from './spawnWorkerProcess.js'; export * from './showCombinedLogs.js'; export * from './installInteractiveSwitcher.js'; export * from './exportCombinedLogs.js'; -export * from './logRotation.js'; export * from './osc8Link.js'; export * from './keymap.js'; export * from './replayFileTailToStdout.js'; export * from './workerIds.js'; export * from './os.js'; -export * from './spawnWorkerProcess.js'; export * from './dashboardPlugin.js'; -export * from './safeGetLogPathsForSlot.js'; export * from './uiPlugins.js'; -export * from './types.js'; -export * from './runPool.js'; export * from './createExtraKeyHandler.js'; diff --git a/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts b/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts index b32922ec..a52db164 100644 --- a/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts +++ b/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts @@ -5,7 +5,7 @@ import { DEBUG } from '../../constants.js'; import { keymap } from './keymap.js'; import { replayFileTailToStdout } from './replayFileTailToStdout.js'; import type { WhichLogs } from './showCombinedLogs.js'; -import type { WorkerLogPaths } from './spawnWorkerProcess.js'; +import type { WorkerLogPaths } from '@transcend-io/sdk'; import { cycleWorkers, getWorkerIds } from './workerIds.js'; /** diff --git a/packages/cli/src/lib/pooling/logRotation.ts b/packages/cli/src/lib/pooling/logRotation.ts deleted file mode 100644 index f6a56b3e..00000000 --- a/packages/cli/src/lib/pooling/logRotation.ts +++ /dev/null @@ -1,260 +0,0 @@ -// logRotation.ts -import { readdirSync, writeFileSync, existsSync, unlinkSync, mkdirSync } from 'node:fs'; -import { join } from 'node:path'; - -import colors from 'colors'; - -/** - * Reset worker logs in the given directory. - * mode: - * - "truncate": empty files but keep them (best if tails are open) - * - "delete": remove files entirely (simplest if no tails yet) - * - * @param dir - Directory to reset logs in - * @param mode - 'truncate' or 'delete' - */ -function resetWorkerLogs(dir: string, mode: 'truncate' | 'delete'): void { - const patterns = [ - /worker-\d+\.log$/, - /worker-\d+\.out\.log$/, - /worker-\d+\.err\.log$/, - /worker-\d+\.warn\.log$/, - /worker-\d+\.info\.log$/, - ]; - for (const name of readdirSync(dir)) { - // eslint-disable-next-line no-continue - if (!patterns.some((rx) => rx.test(name))) continue; - const p = join(dir, name); - try { - if (mode === 'delete' && existsSync(p)) unlinkSync(p); - else writeFileSync(p, ''); - } catch { - /* ignore */ - } - } - process.stdout.write( - colors.dim(`Logs have been ${mode === 'delete' ? 'deleted' : 'truncated'} in ${dir}\n`), - ); -} - -/** - * Very robust classification of a single log line into warn/error. - * Returns 'warn' | 'error' | null (null = not a level we care to badge). - * - * @param line - Single line of log output to classify - * @returns 'warn' | 'error' | null - */ -export function classifyLogLevel(line: string): 'warn' | 'error' | null { - // Strip common ANSI sequences - // eslint-disable-next-line no-control-regex - const s = line.replace(/\x1B\[[0-9;]*m/g, ''); - - // 1) Explicit worker tag: "[w12] WARN ..." or "[w2] ERROR ..." - const mTag = /\[w\d+\]\s+(ERROR|WARN)\b/i.exec(s); - if (mTag) return mTag[1].toLowerCase() as 'warn' | 'error'; - - // 2) Common plain prefixes - if (/^\s*(ERROR|ERR|FATAL)\b/i.test(s)) return 'error'; - if (/^\s*(WARN|WARNING)\b/.test(s)) return 'warn'; - - // Node runtime warnings - if (/^\s*\(node:\d+\)\s*Warning:/i.test(s)) return 'warn'; - if (/^\s*DeprecationWarning:/i.test(s)) return 'warn'; - - // 3) JSON logs (pino/bunyan/etc.) - // Try to parse as JSON and inspect `level` - try { - const j = JSON.parse(s); - const lv = j?.level; - if (typeof lv === 'number') { - // pino levels: 40=warn, 50=error, 60=fatal - if (lv >= 50) return 'error'; - if (lv >= 40) return 'warn'; - } else if (typeof lv === 'string') { - const L = lv.toLowerCase(); - if (L === 'error' || L === 'fatal') return 'error'; - if (L === 'warn' || L === 'warning') return 'warn'; - } - } catch { - // not JSON, ignore - } - - // 4) Fallthrough: look for level words inside worker-tagged lines - // e.g. "[w3] something WARNING xyz" - const mInline = /\[w\d+\].*\b(WARN|WARNING|ERROR|FATAL)\b/i.exec(s); - if (mInline) { - const L = mInline[1].toUpperCase(); - return L === 'ERROR' || L === 'FATAL' ? 'error' : 'warn'; - } - - return null; -} - -/** - * Stream splitter to get whole lines from 'data' events - * - * @param onLine - Callback to call with each complete line - * @returns A function that processes a chunk of data and calls onLine for each complete line - */ -export function makeLineSplitter(onLine: (line: string) => void): (chunk: Buffer | string) => void { - let buf = ''; - return (chunk: Buffer | string) => { - buf += chunk.toString('utf8'); - let nl: number; - // eslint-disable-next-line no-cond-assign - while ((nl = buf.indexOf('\n')) !== -1) { - const line = buf.slice(0, nl); - onLine(line); - buf = buf.slice(nl + 1); - } - }; -} -/** - * Checks if a log line contains an error indicator. - * - * @param t - The log line to check - * @returns True if the line contains an error keyword, false otherwise - */ -export function isLogError(t: string): boolean { - return /\b(ERROR|uncaughtException|unhandledRejection)\b/i.test(t); -} - -/** - * Checks if a log line contains a warning indicator. - * - * @param t - The log line to check - * @returns True if the line contains a warning keyword, false otherwise - */ -export function isLogWarn(t: string): boolean { - return /\b(WARN|WARNING)\b/i.test(t); -} - -/** - * Determines if a log line is a new header (error, warning, worker tag, or ISO timestamp). - * - * @param t - The log line to check - * @returns True if the line is a new header, false otherwise - */ -export function isLogNewHeader(t: string): boolean { - return ( - isLogError(t) || - isLogWarn(t) || - /^\s*\[w\d+\]/.test(t) || - /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(t) - ); -} - -// eslint-disable-next-line no-control-regex -const stripAnsi = (s: string): string => s.replace(/\x1B\[[0-9;]*m/g, ''); - -/** - * Extracts blocks of text from a larger body of text. - * - * @param text - The text to extract blocks from - * @param starts - A function that determines if a line starts a new block - * @returns An array of extracted blocks - */ -export function extractBlocks(text: string, starts: (cleanLine: string) => boolean): string[] { - if (!text) return []; - const out: string[] = []; - const lines = text.split('\n'); - let buf: string[] = []; - let inBlock = false; - - const flush = (): void => { - if (buf.length) out.push(buf.join('\n')); - buf = []; - inBlock = false; - }; - - for (const raw of lines) { - const clean = stripAnsi(raw || ''); - const headery = isLogNewHeader(clean); - if (!inBlock) { - if (starts(clean)) { - inBlock = true; - buf.push(raw); - } - // eslint-disable-next-line no-continue - continue; - } - if (!raw || headery) { - flush(); - if (starts(clean)) { - inBlock = true; - buf.push(raw); - } - } else { - buf.push(raw); - } - } - flush(); - return out.filter(Boolean); -} - -/** - * The kind of export artifact to retrieve the path for. - */ -export type LogExportKind = 'error' | 'warn' | 'info' | 'all'; - -/** - * Ensure log directory exists - * - * @param rootDir - Root directory - * @returns log dir - */ -export function initLogDir(rootDir: string): string { - const logDir = join(rootDir, 'logs'); - mkdirSync(logDir, { recursive: true }); - - // FIXME - const RESET_MODE = (process.env.RESET_LOGS as 'truncate' | 'delete') ?? 'truncate'; - resetWorkerLogs(logDir, RESET_MODE); - - return logDir; -} - -export interface ExportArtifactResult { - /** Whether the artifact was opened successfully */ - ok?: boolean; - /** The absolute path to the export artifact */ - path: string; - /** Time saved */ - savedAt?: number; - /** If exported */ - exported?: boolean; -} - -/** - * Status map for export artifacts. - */ -export type ExportStatusMap = { - /** The absolute paths to the error log artifacts */ - error?: ExportArtifactResult; - /** The absolute paths to the warn log artifacts */ - warn?: ExportArtifactResult; - /** The absolute paths to the info log artifacts */ - info?: ExportArtifactResult; - /** The absolute paths to all log artifacts */ - all?: ExportArtifactResult; - /** The absolute paths to the failures CSV artifacts */ - failuresCsv?: ExportArtifactResult; -}; - -/** - * Return export statuses - * - * FIXME what is this for? - * - * @param receiptsFolder - Receipts directory - * @returns Export map - */ -export function buildExportStatus(receiptsFolder: string): ExportStatusMap { - return { - error: { path: join(receiptsFolder, 'combined-errors.log') }, - warn: { path: join(receiptsFolder, 'combined-warns.log') }, - info: { path: join(receiptsFolder, 'combined-info.log') }, - all: { path: join(receiptsFolder, 'combined-all.log') }, - failuresCsv: { path: join(receiptsFolder, 'failing-updates.csv') }, - }; -} diff --git a/packages/cli/src/lib/pooling/runPool.ts b/packages/cli/src/lib/pooling/runPool.ts deleted file mode 100644 index 2fa8ed24..00000000 --- a/packages/cli/src/lib/pooling/runPool.ts +++ /dev/null @@ -1,641 +0,0 @@ -import type { ChildProcess } from 'node:child_process'; - -import type { ObjByString } from '@transcend-io/type-utils'; -import { RateCounter } from '@transcend-io/utils'; -/* eslint-disable max-lines */ -import colors from 'colors'; - -import { installInteractiveSwitcher } from './installInteractiveSwitcher.js'; -import { classifyLogLevel, initLogDir, makeLineSplitter } from './logRotation.js'; -import { safeGetLogPathsForSlot } from './safeGetLogPathsForSlot.js'; -import { - getWorkerLogPaths, - isIpcOpen, - safeSend, - spawnWorkerProcess, - type WorkerLogPaths, -} from './spawnWorkerProcess.js'; -import type { SlotState, FromWorker, ToWorker } from './types.js'; - -/** - * Callbacks used by the generic pool orchestrator to: - * - fetch tasks, - * - format labels for UI, - * - fold progress and results into aggregate totals, - * - run optional post-processing once the pool completes. - * - * Each command supplies concrete `TTask`, `TProg`, `TRes`, and optionally a - * custom totals type `TTotals`. - */ -export interface PoolHooks< - TTask extends ObjByString, - TProg extends ObjByString, - TRes extends ObjByString, - TTotals = unknown, -> { - /** - * Produce the next work item for a slot. - * - * @returns The next task or `undefined` if no tasks remain. - */ - nextTask: () => TTask | undefined; - - /** - * Human-readable label for a task, shown in dashboards. - * - * @param t - The task to label. - * @returns A short descriptor, typically a file path or identifier. - */ - taskLabel: (t: TTask) => string; - - /** - * Fold an incoming progress payload into aggregate totals. - * Should be pure (no side effects) and return the new totals object. - * - * @param prevTotals - The previous totals value. - * @param prog - The latest progress payload from a worker. - * @returns Updated totals. - */ - onProgress: (prevTotals: TTotals, prog: TProg) => TTotals; - - /** - * Handle a final result from a worker. - * Should be pure and return the new totals plus a boolean indicating if the - * unit succeeded (used to set per-slot level/metrics). - * - * @param prevTotals - The previous totals value. - * @param res - The result payload from a worker. - * @returns Object containing updated totals and success flag. - */ - onResult: ( - prevTotals: TTotals, - res: TRes, - ) => { - /** Updated totals after processing this result */ - totals: TTotals; - /** Whether the task was successful */ - ok: boolean; - }; - - /** - * Initialize per-slot progress state when a task is assigned. - * Useful when you want a non-undefined `progress` immediately. - * - * @param t - The task to be started in this slot. - * @returns Initial progress state or `undefined`. - */ - initSlotProgress?: (t: TTask) => TProg | undefined; - - /** - * Produce the initial totals value for the pool (defaults to `{}`). - * - * @returns A new totals object. - */ - initTotals?: () => TTotals; - - /** - * Provide an export status map for dashboards (optional). - * - * @returns A status object or `undefined` if not applicable. - */ - exportStatus?: () => Record | undefined; - - /** - * Optional post-processing step invoked after the pool finishes. - * Common use: writing combined logs/artifacts once all workers complete. - * - * When {@link RunPoolOptions.viewerMode} is enabled, the runner also passes - * the **log directory** and the **per-slot log file paths** so you can - * replicate the legacy “viewer mode” auto-exports (combined logs, indices, etc.). - */ - postProcess?: (ctx: { - /** Live snapshot of all worker slots at completion. */ - slots: Map>; - /** Final aggregate totals. */ - totals: TTotals; - /** Absolute path to the pool’s log directory. */ - logDir: string; - /** - * Mapping of slot id -> log paths (stdout/stderr/current, rotations may exist). - * Use this to collect and export artifacts after completion. - */ - logsBySlot: Map; - /** Unix millis when the pool started (first worker spawned). */ - startedAt: number; - /** Unix millis when the pool fully completed (after last worker exit). */ - finishedAt: number; - /** - * Helper to safely re-fetch a slot’s current log paths, accounting for respawns. - * Mirrors the dashboard’s attach/switcher behavior. - */ - getLogPathsForSlot: (id: number) => WorkerLogPaths | undefined; - /** True if the pool was run in viewerMode (non-interactive). */ - viewerMode: boolean; - }) => Promise | void; -} - -/** - * Options to run a generic worker pool. - * - * @template TTask - The payload sent to each worker as a "task". - * @template TProg - The progress payload emitted by workers. - * @template TRes - The result payload emitted by workers. - * @template TTotals - The aggregate totals object maintained by hooks. - */ -export interface RunPoolOptions< - TTask extends ObjByString, - TProg extends ObjByString, - TRes extends ObjByString, - TTotals extends ObjByString, -> { - /** Human-readable name for the pool, shown in headers (e.g., "Parallel uploader", "Chunk CSV"). */ - title: string; - - /** - * Directory for pool-local state (logs, discovery messages, artifacts). - * Usually the CLI's working directory for the command. - */ - baseDir: string; - - /** Absolute path of the module the child should execute (the command impl that calls runChild when CHILD_FLAG is present). */ - childModulePath: string; - - /** - * Number of worker processes to spawn. Typically derived via a helper like `computePoolSize`. - */ - poolSize: number; - - /** Logical CPU count used for display only (not required to equal `poolSize`). */ - cpuCount: number; - - /** - * Flag that the child module expects to see in `process.argv` to run in "worker" mode. - * This MUST match the flag the worker module checks (e.g., `--as-child`). - */ - childFlag: string; - - /** - * Renderer function injected by the command. The runner calls this on each "tick" - * and on significant state changes (progress, completion, attach/detach). - */ - render: (input: { - /** Header/title for the UI. */ - title: string; - /** Configured pool size (number of workers). */ - poolSize: number; - /** CPU count for informational display. */ - cpuCount: number; - /** Total number of files/tasks anticipated by the command. */ - filesTotal: number; - /** Number of files/tasks that have produced a successful result so far. */ - filesCompleted: number; - /** Number of files/tasks that have produced a failed result so far. */ - filesFailed: number; - /** - * Per-slot state for each worker, including busy flag, file label, start time, - * last log level badge, and optional progress payload. - */ - workerState: Map>; - /** - * Arbitrary totals object maintained by hooks. This is the primary place to surface - * domain-specific aggregate metrics in the UI. - */ - totals: TTotals; - /** - * Smoothed throughput metrics computed by the runner: - * - successSoFar: convenience mirror of completed count for the renderer - * - r10s: moving average of completions per second over ~10 seconds - * - r60s: moving average of completions per second over ~60 seconds - */ - throughput: { - /** Convenience mirror of `filesCompleted` for renderers that expect it in this block. */ - successSoFar: number; - /** Moving average file completions/sec (10s window). */ - r10s: number; - /** Moving average file completions/sec (60s window). */ - r60s: number; - /** Moving average job/record completions/sec (10s window). */ - jobsR10s: number; - /** Moving average job/record completions/sec (60s window). */ - jobsR60s: number; - }; - /** True when the pool has fully drained and all workers have exited. */ - final: boolean; - /** - * Optional export status payload surfaced by hooks; used by commands that generate - * multiple artifact files and want to show "latest paths" in the UI. - */ - exportStatus?: Record; - }) => void; - - /** - * Hook suite that adapts the pool to a specific command: - * - nextTask(): TTask | undefined - * - taskLabel(task): string - * - initTotals?(): TTotals - * - initSlotProgress?(task): TProg - * - onProgress(totals, prog): TTotals - * - onResult(totals, res): { totals: TTotals; ok: boolean } - * - postProcess?({ slots, totals, logDir, logsBySlot, ... }): Promise | void - * - exportStatus?(): Record - */ - hooks: PoolHooks; - - /** - * Total number of "files" or logical items the command expects to process. - * Used purely for UI/ETA; does not affect scheduling. - */ - filesTotal: number; - - /** Open worker logs in new terminals (macOS). Default true unless viewerMode=true. */ - openLogWindows?: boolean; - - /** Silence worker stdio (except logs). */ - isSilent?: boolean; - - /** - * When true, run in “viewer mode” (non-interactive): - * - Do NOT install the interactive attach/switcher. - * - Default `openLogWindows` to false. - * - Still render on a timer. - * - Provide `logDir`/`logsBySlot` to `postProcess` for auto-exports. - */ - viewerMode?: boolean; - - /** - * Optional factory for additional key bindings (e.g., log viewers/exports). - * Only used when viewerMode === false. - */ - extraKeyHandler?: (args: { - /** per-slot log paths (kept up-to-date across respawns) */ - logsBySlot: Map; - /** re-render dashboard now */ - repaint: () => void; - /** pause/unpause dashboard repaint while showing viewers */ - setPaused: (p: boolean) => void; - }) => (buf: Buffer) => void; -} - -/** - * Run a multi-process worker pool for a command. - * The runner owns: spawning workers, assigning tasks, collecting progress/results, - * basic log badging (WARN/ERROR), an interactive attach/switcher (unless viewerMode), - * and a render loop. - * - * The command injects "hooks" to customize scheduling and totals aggregation. - * - * @param opts - Options - */ -export async function runPool< - TTask extends ObjByString, - TProg extends ObjByString, - TRes extends ObjByString, - TTotals extends ObjByString, ->(opts: RunPoolOptions): Promise { - const { - title, - baseDir, - poolSize, - cpuCount, - render, - childModulePath, - hooks, - filesTotal, - childFlag, - viewerMode = false, - } = opts; - - // Default behaviors may change under viewerMode. - const openLogWindows = opts.openLogWindows ?? !viewerMode; - const isSilent = opts.isSilent ?? true; - - const startedAt = Date.now(); - const logDir = initLogDir(baseDir); - - /** Live worker processes keyed by slot id. */ - const workers = new Map(); - /** Per-slot state tracked for the UI and scheduling. */ - const workerState = new Map>(); - /** File paths for each worker’s stdout/stderr logs. */ - const slotLogs = new Map(); - /** File-completion throughput meter. */ - const meter = new RateCounter(); - /** Job/record-level throughput meter (fed from progress.processed deltas). */ - const jobMeter = new RateCounter(); - /** Last-seen `processed` count per worker slot, used to compute deltas. */ - const lastProcessed = new Map(); - const totalsInit = (hooks.initTotals?.() ?? {}) as TTotals; - - let totalsBox = totalsInit; - let activeWorkers = 0; - let completed = 0; - let failed = 0; - - // Repaint ticker starts on first READY to avoid double-first-render. - let ticker: NodeJS.Timeout | null = null; - let firstReady = false; - // Gate repaint during popup viewers/exports (driven by extraKeyHandler). - let paused = false; - // Keep a reference so we can unbind on exit. - let extraHandler: ((buf: Buffer) => void) | null = null; - - /** - * Paint the UI. The renderer is intentionally pure and receives - * a snapshot of current state. - * - * @param final - If true, render the final state and exit. - */ - const repaint = (final = false): void => { - if (paused) return; - render({ - title, - poolSize, - cpuCount, - filesTotal, - filesCompleted: completed, - filesFailed: failed, - workerState, - totals: totalsBox, - final, - exportStatus: hooks.exportStatus?.(), - throughput: { - successSoFar: completed, - r10s: meter.rate(10_000), - r60s: meter.rate(60_000), - jobsR10s: jobMeter.rate(10_000), - jobsR60s: jobMeter.rate(60_000), - }, - }); - }; - - /** - * Assign the next task to `id` if available. - * - * @param id - The worker slot id to assign a task to. - * @returns true if a task was assigned. - * - * NOTE: This is the critical fix. We **do not** "peek & put back" a task. - * We only consume via `nextTask()` inside this function. - */ - const assign = (id: number): boolean => { - const task = hooks.nextTask(); - if (!task) return false; - - const child = workers.get(id)!; - const label = hooks.taskLabel(task); - const initialProg = hooks.initSlotProgress?.(task); - - workerState.set(id, { - busy: true, - file: label, - startedAt: Date.now(), - lastLevel: 'ok', - progress: initialProg, - }); - - safeSend(child, { type: 'task', payload: task } as ToWorker); - repaint(); - return true; - }; - - /* Spawn workers */ - for (let i = 0; i < poolSize; i += 1) { - const child = spawnWorkerProcess({ - id: i, - modulePath: childModulePath, - logDir, - openLogWindows, - isSilent, - childFlag, - }); - workers.set(i, child); - workerState.set(i, { - busy: false, - file: null, - startedAt: null, - lastLevel: 'ok', - }); - slotLogs.set(i, getWorkerLogPaths(child)); - activeWorkers += 1; - - // badge WARN/ERROR quickly from stderr - const errLine = makeLineSplitter((line) => { - const lvl = classifyLogLevel(line); - if (!lvl) return; - const prev = workerState.get(i)!; - if (prev.lastLevel !== lvl) { - workerState.set(i, { ...prev, lastLevel: lvl }); - repaint(); - } - }); - child.stderr?.on('data', errLine); - - // messages from the worker - // eslint-disable-next-line no-loop-func - child.on('message', (msg: FromWorker) => { - if (!msg || typeof msg !== 'object') return; - - if (msg.type === 'ready') { - if (!firstReady) { - firstReady = true; - ticker = setInterval(() => repaint(false), 350); - } - assign(i); // try to start work immediately - return; - } - - if (msg.type === 'progress') { - totalsBox = hooks.onProgress(totalsBox, msg.payload); - const prev = workerState.get(i)!; - workerState.set(i, { ...prev, progress: msg.payload }); - - // Feed job-level meter from progress.processed deltas - const payload = msg.payload as Record; - if (typeof payload?.processed === 'number') { - const prevCount = lastProcessed.get(i) ?? 0; - const delta = payload.processed - prevCount; - if (delta > 0) jobMeter.add(delta); - lastProcessed.set(i, payload.processed); - } - - repaint(); - return; - } - - if (msg.type === 'result') { - const prev = workerState.get(i)!; - const { totals: t2, ok } = hooks.onResult(totalsBox, msg.payload); - totalsBox = t2; - - if (ok) { - completed += 1; - meter.add(1); - } else { - failed += 1; - } - - workerState.set(i, { - ...prev, - busy: false, - file: null, - progress: undefined, - lastLevel: ok ? 'ok' : 'error', - }); - lastProcessed.delete(i); - - // Just try to assign; if none left, shut this child down. - if (!assign(i) && isIpcOpen(child)) { - safeSend(child, { type: 'shutdown' } as ToWorker); - } - repaint(); - } - }); - - // eslint-disable-next-line no-loop-func - child.on('exit', () => { - activeWorkers -= 1; - if (activeWorkers === 0) { - if (ticker) clearInterval(ticker); - repaint(true); - } - }); - } - - /* Interactive attach/switcher */ - let cleanupSwitcher: () => void = () => { - /* noop */ - // no-op by default, overridden in non-viewerMode - }; - - const tearDownStdin = (): void => { - try { - process.stdin.setRawMode?.(false); - } catch { - /* noop */ - } - try { - process.stdin.pause(); - } catch { - /* noop */ - } - }; - - const onSigint = (): void => { - if (ticker) clearInterval(ticker); - cleanupSwitcher?.(); - if (extraHandler) { - try { - process.stdin.off('data', extraHandler); - } catch { - /* noop */ - } - } - tearDownStdin(); - - process.stdout.write('\nStopping workers...\n'); - for (const [, w] of workers) { - if (isIpcOpen(w)) safeSend(w, { type: 'shutdown' } as ToWorker); - try { - w?.kill('SIGTERM'); - } catch { - /* noop */ - } - } - process.exit(130); - }; - - const onAttach = (id: number): void => { - paused = true; // stop dashboard repaint while attached/viewing - process.stdout.write('\x1b[2J\x1b[H'); // clear + home - process.stdout.write( - `Attached to worker ${id}. (Esc/Ctrl+] detach • Ctrl+D EOF • Ctrl+C SIGINT)\n`, - ); - }; - const onDetach = (): void => { - paused = false; - repaint(); - }; - - process.once('SIGINT', onSigint); - - if (!viewerMode) { - if (process.stdin.isTTY) { - try { - process.stdin.setRawMode(true); - } catch { - process.stdout.write( - colors.yellow('Warning: Unable to enable raw mode for interactive key handling.\n'), - ); - } - process.stdin.resume(); // keep stdin flowing (no encoding — raw Buffer) - } - - cleanupSwitcher = installInteractiveSwitcher({ - workers, - onAttach, - onDetach, - onCtrlC: onSigint, - getLogPaths: (id) => safeGetLogPathsForSlot(id, workers, slotLogs), - replayBytes: 200 * 1024, - replayWhich: ['out', 'err'], - onEnterAttachScreen: onAttach, - }); - - if (opts.extraKeyHandler) { - extraHandler = opts.extraKeyHandler({ - logsBySlot: slotLogs, - repaint: () => repaint(), - setPaused: (p) => { - paused = p; - }, - }); - process.stdin.on('data', extraHandler); - } - } - - /* Wait for full completion, then post-process (with log context if needed). */ - await new Promise((resolve) => { - const check = setInterval(async () => { - if (activeWorkers === 0) { - clearInterval(check); - if (ticker) clearInterval(ticker); - cleanupSwitcher(); - - if (extraHandler) { - try { - process.stdin.off('data', extraHandler); - } catch { - /* noop */ - } - } - tearDownStdin(); - - const finishedAt = Date.now(); - - try { - await hooks.postProcess?.({ - slots: workerState, - totals: totalsBox, - logDir, - logsBySlot: slotLogs, - startedAt, - finishedAt, - viewerMode, - getLogPathsForSlot: (id: number) => safeGetLogPathsForSlot(id, workers, slotLogs), - }); - } catch (err: unknown) { - const msg = - ( - err as { - /** Error stack */ - stack?: string; - } - )?.stack ?? String(err); - process.stdout.write(colors.red(`postProcess error: ${msg}\n`)); - } - resolve(); - } - }, 300); - }); -} -/* eslint-enable max-lines */ diff --git a/packages/cli/src/lib/pooling/safeGetLogPathsForSlot.ts b/packages/cli/src/lib/pooling/safeGetLogPathsForSlot.ts deleted file mode 100644 index 221b76be..00000000 --- a/packages/cli/src/lib/pooling/safeGetLogPathsForSlot.ts +++ /dev/null @@ -1,28 +0,0 @@ -import type { ChildProcess } from 'node:child_process'; - -import { getWorkerLogPaths, isIpcOpen, type WorkerLogPaths } from './spawnWorkerProcess.js'; - -/** - * Safely retrieve log paths for a worker slot. - * - * @param id - The worker slot ID - * @param workers - Map of worker IDs to their ChildProcess instances - * @param slotLogPaths - Map of worker IDs to their log paths - * @returns The log paths for the worker slot, or undefined if not available - */ -export function safeGetLogPathsForSlot( - id: number, - workers: Map, - slotLogPaths: Map, -): WorkerLogPaths | undefined { - const live = workers.get(id); - if (isIpcOpen(live)) { - try { - const p = getWorkerLogPaths(live!); - if (p !== undefined && p !== null) return p; - } catch { - /* fall back */ - } - } - return slotLogPaths.get(id); -} diff --git a/packages/cli/src/lib/pooling/showCombinedLogs.ts b/packages/cli/src/lib/pooling/showCombinedLogs.ts index e5c59e73..80376a99 100644 --- a/packages/cli/src/lib/pooling/showCombinedLogs.ts +++ b/packages/cli/src/lib/pooling/showCombinedLogs.ts @@ -1,7 +1,7 @@ /* eslint-disable no-continue, no-control-regex */ import { readFileSync } from 'node:fs'; -import type { WorkerLogPaths } from './spawnWorkerProcess.js'; +import type { WorkerLogPaths } from '@transcend-io/sdk'; /** * Log locations diff --git a/packages/cli/src/lib/pooling/spawnWorkerProcess.ts b/packages/cli/src/lib/pooling/spawnWorkerProcess.ts deleted file mode 100644 index 07aae79d..00000000 --- a/packages/cli/src/lib/pooling/spawnWorkerProcess.ts +++ /dev/null @@ -1,218 +0,0 @@ -import { fork, type ChildProcess } from 'node:child_process'; -import { createWriteStream } from 'node:fs'; -import { join } from 'node:path'; - -import { ensureLogFile } from './ensureLogFile.js'; -import { classifyLogLevel, makeLineSplitter } from './logRotation.js'; -import { openLogTailWindowMulti } from './openTerminal.js'; - -/** Default child-flag used if a caller doesn’t provide one. */ -export const CHILD_FLAG = '--as-child'; - -// Symbol key so we can stash/retrieve paths on the child proc safely -const LOG_PATHS_SYM: unique symbol = Symbol('workerLogPaths'); - -export interface WorkerLogPaths { - /** Structured (app-controlled) log file path written via WORKER_LOG */ - structuredPath: string; - /** Raw stdout capture */ - outPath: string; - /** Raw stderr capture */ - errPath: string; - /** Lines classified as INFO (primarily stdout) */ - infoPath: string; - /** Lines classified as WARN (from stderr without error tokens) */ - warnPath: string; - /** Lines classified as ERROR (from stderr, including uncaught) */ - errorPath: string; -} - -/** Convenience alias for the optional return from getWorkerLogPaths */ -export type SlotPaths = Map; - -/** - * Retrieve the paths we stashed on the child. - * - * @param child - The worker ChildProcess instance. - * @returns The log paths or undefined if not set. - */ -export function getWorkerLogPaths(child: ChildProcess): WorkerLogPaths | undefined { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return (child as any)[LOG_PATHS_SYM] as WorkerLogPaths | undefined; -} - -/** - * Is IPC channel still open? (Node doesn't type `.channel`) - * - * @param w - The worker ChildProcess instance. - * @returns True if the IPC channel is open, false otherwise. - */ -export function isIpcOpen(w: ChildProcess | undefined | null): boolean { - const ch = w && w.channel; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return !!(w && w.connected && ch && !(ch as any).destroyed); -} - -/** - * Safely send a message to the worker process. - * - * @param w - The worker ChildProcess instance. - * @param msg - The message to send. - * @returns True if the message was sent successfully, false otherwise. - */ -export function safeSend(w: ChildProcess, msg: unknown): boolean { - if (!isIpcOpen(w)) return false; - try { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - w.send?.(msg as any); - return true; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (err: any) { - if (err?.code === 'ERR_IPC_CHANNEL_CLOSED' || err?.code === 'EPIPE' || err?.errno === -32) { - return false; - } - throw err; - } -} - -export interface SpawnWorkerOptions { - /** Worker slot/index */ - id: number; - /** Absolute path to the module to fork (should handle CHILD_FLAG) */ - modulePath: string; - /** Directory where log files will be written */ - logDir: string; - /** If true, open tail windows for the log files */ - openLogWindows: boolean; - /** If true, spawn with silent stdio (respect your existing setting) */ - isSilent: boolean; - /** Optional override for the child flag (defaults to CHILD_FLAG) */ - childFlag?: string; -} - -/** - * Spawn a worker process with piped stdio and persisted logs. - * - * Files produced per worker: - * - worker-{id}.log (structured WORKER_LOG written by the child) - * - worker-{id}.out.log (raw stdout) - * - worker-{id}.err.log (raw stderr) - * - worker-{id}.info.log (classified INFO lines from stdout) - * - worker-{id}.warn.log (classified WARN lines from stderr) - * - worker-{id}.error.log (classified ERROR lines from stderr) - * - * @param opts - Options for spawning the worker process. - * @returns The spawned ChildProcess instance. - */ -export function spawnWorkerProcess(opts: SpawnWorkerOptions): ChildProcess { - const { id, modulePath, logDir, openLogWindows, isSilent, childFlag = CHILD_FLAG } = opts; - - const structuredPath = join(logDir, `worker-${id}.log`); - const outPath = join(logDir, `worker-${id}.out.log`); - const errPath = join(logDir, `worker-${id}.err.log`); - const infoPath = join(logDir, `worker-${id}.info.log`); - const warnPath = join(logDir, `worker-${id}.warn.log`); - const errorPath = join(logDir, `worker-${id}.error.log`); - - [structuredPath, outPath, errPath, infoPath, warnPath, errorPath].forEach(ensureLogFile); - - const child = fork(modulePath, [childFlag], { - stdio: ['pipe', 'pipe', 'pipe', 'ipc'], - env: { ...process.env, WORKER_ID: String(id), WORKER_LOG: structuredPath }, - execArgv: process.execArgv, - silent: isSilent, - }); - - // Raw capture streams - const outStream = createWriteStream(outPath, { flags: 'a' }); - const errStream = createWriteStream(errPath, { flags: 'a' }); - - // Classified streams - const infoStream = createWriteStream(infoPath, { flags: 'a' }); - const warnStream = createWriteStream(warnPath, { flags: 'a' }); - const errorStream = createWriteStream(errorPath, { flags: 'a' }); - - // Pipe raw streams - child.stdout?.pipe(outStream); - child.stderr?.pipe(errStream); - - // Headers so tail windows show something immediately - const hdr = (name: string): string => - `[parent] ${name} capture active for w${id} (pid ${child.pid})\n`; - outStream.write(hdr('stdout')); - errStream.write(hdr('stderr')); - infoStream.write(hdr('info')); - warnStream.write(hdr('warn')); - errorStream.write(hdr('error')); - - // Classified INFO from stdout (line-buffered) - if (child.stdout) { - const onOutLine = makeLineSplitter((line) => { - if (!line) return; - try { - // Treat all stdout lines as INFO for the classified stream - infoStream.write(`${line}\n`); - } catch { - /* ignore */ - } - }); - child.stdout.on('data', onOutLine); - } - - // Classified WARN/ERROR from stderr (line-buffered) - if (child.stderr) { - const onErrLine = makeLineSplitter((line) => { - if (!line) return; - const lvl = classifyLogLevel(line); // 'warn' | 'error' | null - try { - if (lvl === 'error') { - errorStream.write(`${line}\n`); - } else { - // Treat untagged stderr as WARN by default (common in libs) - warnStream.write(`${line}\n`); - } - } catch { - /* ignore */ - } - }); - child.stderr.on('data', onErrLine); - } - - // Stash log path metadata on the child - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (child as any)[LOG_PATHS_SYM] = { - structuredPath, - outPath, - errPath, - infoPath, - warnPath, - errorPath, - } as WorkerLogPaths; - - if (openLogWindows) { - openLogTailWindowMulti( - [structuredPath, outPath, errPath, infoPath, warnPath, errorPath], - `worker-${id}`, - isSilent, - ); - } - - // Best-effort error suppression on file streams - outStream.on('error', () => { - /* ignore */ - }); - errStream.on('error', () => { - /* ignore */ - }); - infoStream.on('error', () => { - /* ignore */ - }); - warnStream.on('error', () => { - /* ignore */ - }); - errorStream.on('error', () => { - /* ignore */ - }); - - return child; -} diff --git a/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts b/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts index 7a42fd3f..0c46cbff 100644 --- a/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts +++ b/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { buildExportStatus } from '../logRotation.js'; +import { buildExportStatus } from '@transcend-io/sdk'; describe('buildExportStatus', () => { it('returns expected paths for all export artifacts', () => { diff --git a/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts b/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts index cd496ca5..5dc6087c 100644 --- a/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts +++ b/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { classifyLogLevel } from '../logRotation.js'; +import { classifyLogLevel } from '@transcend-io/sdk'; describe('classifyLogLevel', () => { it('detects explicit worker tags', () => { diff --git a/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts b/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts index eab5beb3..55f532be 100644 --- a/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts +++ b/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts @@ -1,8 +1,7 @@ /* eslint-disable max-lines */ import { describe, it, expect, vi, beforeEach, afterEach, afterAll } from 'vitest'; -import type { ExportStatusMap } from '../logRotation.js'; -import type { SlotPaths } from '../spawnWorkerProcess.js'; +import type { ExportStatusMap, SlotPaths } from '@transcend-io/sdk'; /** * Mock the combined logs viewer. We assert calls and control resolution/rejection. diff --git a/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts b/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts index 31eb50f4..9aef86fc 100644 --- a/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts +++ b/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts @@ -2,7 +2,7 @@ import { existsSync, openSync, closeSync } from 'node:fs'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { ensureLogFile } from '../ensureLogFile.js'; +import { ensureLogFile } from '@transcend-io/sdk'; /** * Mock fs BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts b/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts index 57090de3..026df55b 100644 --- a/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts +++ b/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { extractBlocks } from '../logRotation.js'; +import { extractBlocks } from '@transcend-io/sdk'; /** * Blocks should start when `starts(cleanLine)` returns true, diff --git a/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts b/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts index fce21dee..975cba31 100644 --- a/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts +++ b/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts @@ -3,9 +3,9 @@ import { createWriteStream, type WriteStream } from 'node:fs'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { ensureLogFile } from '../ensureLogFile.js'; +import { ensureLogFile, getWorkerLogPaths, spawnWorkerProcess } from '@transcend-io/sdk'; + import { openLogTailWindowMulti } from '../openTerminal.js'; -import { getWorkerLogPaths, spawnWorkerProcess } from '../spawnWorkerProcess.js'; /** * Mock deps before SUT import (inline factories). @@ -19,14 +19,14 @@ vi.mock('node:fs', () => ({ vi.mock('../openTerminal.js', () => ({ openLogTailWindowMulti: vi.fn(), })); -vi.mock('../ensureLogFile.js', () => ({ - ensureLogFile: vi.fn(), -})); -vi.mock('../logRotation.js', () => { +vi.mock('@transcend-io/sdk', async () => { + const actual = await vi.importActual('@transcend-io/sdk'); const makeLineSplitter = vi.fn( (cb: (line: string) => void) => (chunk: unknown) => cb(String(chunk)), ); return { + ...actual, + ensureLogFile: vi.fn(), classifyLogLevel: vi.fn(), makeLineSplitter, }; @@ -113,7 +113,6 @@ describe('getWorkerLogPaths', () => { id: 7, modulePath: '/mod.js', logDir: '/logs', - openLogWindows: false, isSilent: true, }); diff --git a/packages/cli/src/lib/pooling/tests/initLogDir.test.ts b/packages/cli/src/lib/pooling/tests/initLogDir.test.ts index 26903b43..56770e1f 100644 --- a/packages/cli/src/lib/pooling/tests/initLogDir.test.ts +++ b/packages/cli/src/lib/pooling/tests/initLogDir.test.ts @@ -2,7 +2,7 @@ import { readdirSync, writeFileSync, existsSync, unlinkSync, mkdirSync } from 'n import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { initLogDir } from '../logRotation.js'; +import { initLogDir } from '@transcend-io/sdk'; /** * Mock colors BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts b/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts index 13295e6a..024db428 100644 --- a/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts +++ b/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts @@ -7,7 +7,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { installInteractiveSwitcher } from '../installInteractiveSwitcher.js'; import { keymap } from '../keymap.js'; import { replayFileTailToStdout } from '../replayFileTailToStdout.js'; -import type { WorkerLogPaths } from '../spawnWorkerProcess.js'; +import type { WorkerLogPaths } from '@transcend-io/sdk'; import { getWorkerIds, cycleWorkers } from '../workerIds.js'; /** diff --git a/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts b/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts index 7e56ac79..2d506b36 100644 --- a/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts +++ b/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts @@ -2,7 +2,7 @@ import type { ChildProcess } from 'node:child_process'; import { describe, it, expect } from 'vitest'; -import { isIpcOpen } from '../spawnWorkerProcess.js'; +import { isIpcOpen } from '@transcend-io/sdk'; /** * Build a minimal ChildProcess-like object for isIpcOpen tests. diff --git a/packages/cli/src/lib/pooling/tests/isLogError.test.ts b/packages/cli/src/lib/pooling/tests/isLogError.test.ts index 78d24ddd..1b6d4b86 100644 --- a/packages/cli/src/lib/pooling/tests/isLogError.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogError.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogError } from '../logRotation.js'; +import { isLogError } from '@transcend-io/sdk'; describe('isLogError', () => { it('matches ERROR and runtime fatal indicators', () => { diff --git a/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts b/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts index b5c9d2ef..e52b9330 100644 --- a/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogNewHeader } from '../logRotation.js'; +import { isLogNewHeader } from '@transcend-io/sdk'; describe('isLogNewHeader', () => { it('is true for errors, warnings, worker tags, and ISO timestamps', () => { diff --git a/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts b/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts index be271bfa..3f170601 100644 --- a/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogWarn } from '../logRotation.js'; +import { isLogWarn } from '@transcend-io/sdk'; describe('isLogWarn', () => { it('matches WARN/WARNING case-insensitively', () => { diff --git a/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts b/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts index b538e7d2..8d796c8e 100644 --- a/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts +++ b/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { makeLineSplitter } from '../logRotation.js'; +import { makeLineSplitter } from '@transcend-io/sdk'; describe('makeLineSplitter', () => { it('emits one line per newline across chunk boundaries', () => { diff --git a/packages/cli/src/lib/pooling/tests/runPool.test.ts b/packages/cli/src/lib/pooling/tests/runPool.test.ts index f80b5b6e..d428e6b1 100644 --- a/packages/cli/src/lib/pooling/tests/runPool.test.ts +++ b/packages/cli/src/lib/pooling/tests/runPool.test.ts @@ -3,7 +3,7 @@ import { EventEmitter } from 'node:events'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; /* SUT */ -import { runPool } from '../runPool.js'; +import { runPool } from '@transcend-io/sdk'; /* colors → identity */ vi.mock('colors', () => ({ @@ -38,20 +38,26 @@ const mGetWorkerLogPaths = vi.fn().mockReturnValue({ }); const mSpawnWorkerProcess = vi.fn(); -vi.mock('../spawnWorkerProcess.js', () => ({ - safeSend: (...a: unknown[]) => mSafeSend(...a), - isIpcOpen: (...a: unknown[]) => mIsIpcOpen(...a), - getWorkerLogPaths: (...a: unknown[]) => mGetWorkerLogPaths(...a), - spawnWorkerProcess: (...a: unknown[]) => mSpawnWorkerProcess(...a), -})); - -/* logRotation bits the runner uses */ -vi.mock('../logRotation.js', () => ({ - initLogDir: vi.fn(() => '/exp'), // avoid real FS - makeLineSplitter: (fn: (line: string) => void) => (buf: unknown) => fn(String(buf)), - classifyLogLevel: (line: string) => - /ERROR/.test(line) ? 'error' : /WARN/.test(line) ? 'warn' : undefined, -})); +vi.mock('@transcend-io/sdk', async () => { + const actual = await vi.importActual('@transcend-io/sdk'); + return { + ...actual, + safeSend: (...a: unknown[]) => mSafeSend(...a), + isIpcOpen: (...a: unknown[]) => mIsIpcOpen(...a), + getWorkerLogPaths: (...a: unknown[]) => mGetWorkerLogPaths(...a), + spawnWorkerProcess: (...a: unknown[]) => mSpawnWorkerProcess(...a), + initLogDir: vi.fn(() => '/exp'), + makeLineSplitter: (fn: (line: string) => void) => (buf: unknown) => fn(String(buf)), + classifyLogLevel: (line: string) => + /ERROR/.test(line) ? 'error' : /WARN/.test(line) ? 'warn' : undefined, + safeGetLogPathsForSlot: vi.fn(() => ({ + out: '/tmp/out.log', + err: '/tmp/err.log', + structured: '/tmp/structured.log', + current: '/tmp/current.log', + })), + }; +}); /* interactive switcher → cleanup fn */ const mInstallInteractiveSwitcher = vi.fn().mockReturnValue(() => { @@ -61,16 +67,6 @@ vi.mock('../installInteractiveSwitcher.js', () => ({ installInteractiveSwitcher: (...a: unknown[]) => mInstallInteractiveSwitcher(...a), })); -/* safeGetLogPathsForSlot — simple stub */ -vi.mock('../safeGetLogPathsForSlot.js', () => ({ - safeGetLogPathsForSlot: vi.fn(() => ({ - out: '/tmp/out.log', - err: '/tmp/err.log', - structured: '/tmp/structured.log', - current: '/tmp/current.log', - })), -})); - /* ─── Test utilities ─────────────────────────────────────────────────────── */ /** Fake child process used by the pool. */ @@ -173,7 +169,7 @@ describe('runPool', () => { cpuCount: 8, filesTotal: 1, childFlag: '--as-child', - hooks: hooks as unknown as import('../runPool.js').PoolHooks, + hooks: hooks as unknown as import('@transcend-io/sdk').PoolHooks, render: (snap) => { renders.push({ title: snap.title, @@ -277,7 +273,7 @@ describe('runPool', () => { cpuCount: 4, filesTotal: 1, childFlag: '--as-child', - hooks: hooks as unknown as import('../runPool.js').PoolHooks, + hooks: hooks as unknown as import('@transcend-io/sdk').PoolHooks, viewerMode: true, render: (snap) => flags.push(snap.final), }); diff --git a/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts b/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts index 62594818..708fed1a 100644 --- a/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts +++ b/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts @@ -2,8 +2,12 @@ import type { ChildProcess } from 'node:child_process'; import { describe, it, expect, vi } from 'vitest'; -import { safeGetLogPathsForSlot } from '../safeGetLogPathsForSlot.js'; -import { isIpcOpen, getWorkerLogPaths, type WorkerLogPaths } from '../spawnWorkerProcess.js'; +import { + safeGetLogPathsForSlot, + isIpcOpen, + getWorkerLogPaths, + type WorkerLogPaths, +} from '@transcend-io/sdk'; /** * Mock collaborators BEFORE importing the SUT. @@ -13,10 +17,14 @@ import { isIpcOpen, getWorkerLogPaths, type WorkerLogPaths } from '../spawnWorke * Since the SUT imports from './spawnWorkerProcess', and this test lives in ../tests, * the correct mock specifier here is '../spawnWorkerProcess'. */ -vi.mock('../spawnWorkerProcess.js', () => ({ - isIpcOpen: vi.fn(), - getWorkerLogPaths: vi.fn(), -})); +vi.mock('@transcend-io/sdk', async () => { + const actual = await vi.importActual('@transcend-io/sdk'); + return { + ...actual, + isIpcOpen: vi.fn(), + getWorkerLogPaths: vi.fn(), + }; +}); const mockedIsOpen = vi.mocked(isIpcOpen); const mockedGetPaths = vi.mocked(getWorkerLogPaths); diff --git a/packages/cli/src/lib/pooling/tests/safeSend.test.ts b/packages/cli/src/lib/pooling/tests/safeSend.test.ts index 1370872d..5a77d530 100644 --- a/packages/cli/src/lib/pooling/tests/safeSend.test.ts +++ b/packages/cli/src/lib/pooling/tests/safeSend.test.ts @@ -2,7 +2,7 @@ import type { ChildProcess } from 'node:child_process'; import { describe, it, expect, vi } from 'vitest'; -import { safeSend } from '../spawnWorkerProcess.js'; +import { safeSend } from '@transcend-io/sdk'; /** * Make a ChildProcess-shaped object. diff --git a/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts b/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts index 442ea2d2..c0dd9c9d 100644 --- a/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts +++ b/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts @@ -3,7 +3,7 @@ import { readFileSync } from 'node:fs'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { showCombinedLogs, type WhichLogs } from '../showCombinedLogs.js'; -import type { WorkerLogPaths } from '../spawnWorkerProcess.js'; +import type { WorkerLogPaths } from '@transcend-io/sdk'; /** * Mock fs BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts b/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts index 48f7c539..d13c8aaf 100644 --- a/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts +++ b/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts @@ -3,10 +3,14 @@ import { createWriteStream, type WriteStream } from 'node:fs'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { ensureLogFile } from '../ensureLogFile.js'; -import { classifyLogLevel } from '../logRotation.js'; +import { + ensureLogFile, + classifyLogLevel, + CHILD_FLAG, + spawnWorkerProcess, +} from '@transcend-io/sdk'; + import { openLogTailWindowMulti } from '../openTerminal.js'; -import { CHILD_FLAG, spawnWorkerProcess } from '../spawnWorkerProcess.js'; /** * Inline mocks for all external deps used by the SUT. @@ -20,14 +24,14 @@ vi.mock('node:fs', () => ({ vi.mock('../openTerminal.js', () => ({ openLogTailWindowMulti: vi.fn(), })); -vi.mock('../ensureLogFile.js', () => ({ - ensureLogFile: vi.fn(), -})); -vi.mock('../logRotation.js', () => { +vi.mock('@transcend-io/sdk', async () => { + const actual = await vi.importActual('@transcend-io/sdk'); const makeLineSplitter = vi.fn( (cb: (line: string) => void) => (chunk: unknown) => cb(String(chunk)), ); return { + ...actual, + ensureLogFile: vi.fn(), classifyLogLevel: vi.fn(), makeLineSplitter, }; @@ -111,15 +115,16 @@ describe('spawnWorkerProcess', () => { mCws.mockImplementation(() => sink()); }); - it('ensures log files, forks with proper args, writes headers, and opens tails when enabled', () => { + it('ensures log files, forks with proper args, writes headers, and calls onLogFilesCreated when provided', () => { const child = makeChild(); mFork.mockReturnValue(child as never); + const onLogFilesCreated = vi.fn(); const p = spawnWorkerProcess({ id: 3, modulePath: '/worker.js', logDir: '/tmp/logs', - openLogWindows: true, + onLogFilesCreated, isSilent: false, }); @@ -155,9 +160,9 @@ describe('spawnWorkerProcess', () => { ); }); - // openLogTailWindowMulti called with all six paths - expect(mOpen).toHaveBeenCalledTimes(1); - const [paths, name, isSilent] = mOpen.mock.calls[0]!; + // onLogFilesCreated called with all six paths + expect(onLogFilesCreated).toHaveBeenCalledTimes(1); + const [paths, label, isSilent] = onLogFilesCreated.mock.calls[0]!; expect(paths).toEqual([ '/tmp/logs/worker-3.log', '/tmp/logs/worker-3.out.log', @@ -166,7 +171,7 @@ describe('spawnWorkerProcess', () => { '/tmp/logs/worker-3.warn.log', '/tmp/logs/worker-3.error.log', ]); - expect(name).toBe('worker-3'); + expect(label).toBe('worker-3'); expect(isSilent).toBe(false); }); @@ -182,7 +187,6 @@ describe('spawnWorkerProcess', () => { id: 1, modulePath: '/m.js', logDir: '/l', - openLogWindows: false, isSilent: true, }); diff --git a/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts b/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts index 19c570a5..0de9738f 100644 --- a/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts +++ b/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts @@ -2,7 +2,7 @@ import type { ObjByString } from '@transcend-io/type-utils'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { CommonCtx } from '../dashboardPlugin.js'; -import type { SlotState } from '../types.js'; +import type { SlotState } from '@transcend-io/sdk'; import { fmtNum, pctBar, poolProgress, makeHeader, makeWorkerRows } from '../uiPlugins.js'; /** diff --git a/packages/cli/src/lib/pooling/types.ts b/packages/cli/src/lib/pooling/types.ts deleted file mode 100644 index 2f5fd071..00000000 --- a/packages/cli/src/lib/pooling/types.ts +++ /dev/null @@ -1,63 +0,0 @@ -import type { ObjByString } from '@transcend-io/type-utils'; - -/** Minimal per-slot state the runner keeps */ -export type PoolLevel = 'ok' | 'warn' | 'error'; - -export interface SlotState { - /** True if the worker is currently processing a task */ - busy: boolean; - /** The file being processed by the worker */ - file: string | null; - /** Timestamp when the worker started processing the task */ - startedAt: number | null; - /** Current log level of the worker */ - lastLevel: PoolLevel; - /** Progress */ - progress?: TProg; -} - -/** Message sent by a worker indicating it is ready to receive tasks. */ -export type WorkerReady = { - /** Type ready */ - type: 'ready'; -}; - -/** Message sent by a worker with a progress payload. */ -export type WorkerProgress = { - /** Discriminant. */ - type: 'progress'; - /** Implementation-defined progress payload. */ - payload: TProg; -}; - -/** Message sent by a worker with a final result payload for a single unit. */ -export type WorkerResult = { - /** Discriminant. */ - type: 'result'; - /** Implementation-defined result payload. */ - payload: TRes; -}; - -/** Union of all Worker → Parent messages. */ -export type FromWorker = WorkerReady | WorkerProgress | WorkerResult; - -/** - * Message sent by the parent to a worker to signal shutdown. - */ -export type ShutdownEvent = { - /** Shutdown */ - type: 'shutdown'; -}; - -/** - * Message sent by the parent to a worker to assign a task. - */ -export type TaskEvent = { - /** Task */ - type: 'task'; - /** Payload */ - payload: TTask; -}; - -/** Messages the parent can send to a worker. */ -export type ToWorker = ShutdownEvent | TaskEvent; diff --git a/packages/sdk/src/preference-upload/loadReferenceData.test.ts b/packages/sdk/src/preference-upload/loadReferenceData.test.ts new file mode 100644 index 00000000..2265d224 --- /dev/null +++ b/packages/sdk/src/preference-upload/loadReferenceData.test.ts @@ -0,0 +1,65 @@ +import type { GraphQLClient } from 'graphql-request'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +import type { Identifier } from '../data-inventory/fetchAllIdentifiers.js'; +import type { PreferenceTopic } from '../preference-management/fetchAllPreferenceTopics.js'; +import type { Purpose } from '../preference-management/fetchAllPurposes.js'; + +const H = vi.hoisted(() => ({ + fetchAllPurposes: vi.fn(), + fetchAllPreferenceTopics: vi.fn(), + fetchAllIdentifiers: vi.fn(), +})); + +vi.mock('../preference-management/fetchAllPurposes.js', () => ({ + fetchAllPurposes: H.fetchAllPurposes, +})); +vi.mock('../preference-management/fetchAllPreferenceTopics.js', () => ({ + fetchAllPreferenceTopics: H.fetchAllPreferenceTopics, +})); +vi.mock('../data-inventory/fetchAllIdentifiers.js', () => ({ + fetchAllIdentifiers: H.fetchAllIdentifiers, +})); + +import { loadReferenceData } from './loadReferenceData.js'; + +describe('loadReferenceData', () => { + let client: GraphQLClient; + + beforeEach(() => { + vi.clearAllMocks(); + client = { + request: vi.fn().mockResolvedValue({}), + } as unknown as GraphQLClient; + }); + + it('loads purposes, topics, and identifiers', async () => { + const purposes = [{ id: 'p1' }, { id: 'p2' }] as Purpose[]; + const preferenceTopics = [{ id: 't1' }] as PreferenceTopic[]; + const identifiers = [{ id: 'i1' }, { id: 'i2' }] as Identifier[]; + + H.fetchAllPurposes.mockResolvedValueOnce(purposes); + H.fetchAllPreferenceTopics.mockResolvedValueOnce(preferenceTopics); + H.fetchAllIdentifiers.mockResolvedValueOnce(identifiers); + + const result = await loadReferenceData(client, { logger: console }); + + expect(result.purposes).toEqual(purposes); + expect(result.preferenceTopics).toEqual(preferenceTopics); + expect(result.identifiers).toEqual(identifiers); + + expect(H.fetchAllPurposes).toHaveBeenCalledTimes(1); + expect(H.fetchAllPreferenceTopics).toHaveBeenCalledTimes(1); + expect(H.fetchAllIdentifiers).toHaveBeenCalledTimes(1); + }); + + it('propagates errors (e.g., identifiers fetch fails)', async () => { + const err = new Error('boom'); + + H.fetchAllPurposes.mockResolvedValueOnce([{ id: 'p' }] as Purpose[]); + H.fetchAllPreferenceTopics.mockResolvedValueOnce([{ id: 't' }] as PreferenceTopic[]); + H.fetchAllIdentifiers.mockRejectedValueOnce(err); + + await expect(loadReferenceData(client, { logger: console })).rejects.toBe(err); + }); +}); From 006d414f8e677344532e2b51207a17453e29d6cf Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Sat, 28 Mar 2026 23:20:58 -0700 Subject: [PATCH 08/10] =?UTF-8?q?Move=20pooling=20from=20SDK=20to=20utils?= =?UTF-8?q?=20=E2=80=94=20generic=20infrastructure,=20not=20API-specific?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move packages/sdk/src/pooling/ to packages/utils/src/pooling/ since runPool, spawnWorkerProcess, logRotation, and types are generic multi-process infrastructure (like chunkOneCsvFile, RateCounter) not Transcend API-specific code. - Fix RateCounter import to relative (now within same package) - Add @transcend-io/type-utils dependency to utils package.json - Update utils barrel, remove pooling from SDK barrel - Update ~34 CLI consumer files: pooling imports from @transcend-io/utils Utils typecheck ✓ | Utils build ✓ | SDK typecheck ✓ | CLI typecheck ✓ Made-with: Cursor --- packages/cli/src/commands/admin/chunk-csv/impl.ts | 2 +- .../src/commands/admin/chunk-csv/tests/impl.test.ts | 2 +- packages/cli/src/commands/admin/chunk-csv/worker.ts | 2 +- packages/cli/src/commands/admin/parquet-to-csv/impl.ts | 2 +- .../commands/admin/parquet-to-csv/tests/impl.test.ts | 6 +++--- .../cli/src/commands/admin/parquet-to-csv/worker.ts | 2 +- .../upload-preferences/artifacts/ExportManager.ts | 2 +- .../upload-preferences/artifacts/artifactAbsPath.ts | 2 +- .../artifacts/tests/ExportManager.test.ts | 2 +- .../artifacts/tests/writeExportsIndex.test.ts | 2 +- .../upload-preferences/artifacts/writeExportsIndex.ts | 2 +- .../src/commands/consent/upload-preferences/impl.ts | 2 +- .../src/commands/consent/upload-preferences/worker.ts | 2 +- packages/cli/src/lib/pooling/createExtraKeyHandler.ts | 2 +- packages/cli/src/lib/pooling/dashboardPlugin.ts | 2 +- packages/cli/src/lib/pooling/exportCombinedLogs.ts | 2 +- .../cli/src/lib/pooling/installInteractiveSwitcher.ts | 2 +- packages/cli/src/lib/pooling/showCombinedLogs.ts | 2 +- .../src/lib/pooling/tests/buildExportStatus.test.ts | 2 +- .../cli/src/lib/pooling/tests/classifyLogLevel.test.ts | 2 +- .../lib/pooling/tests/createExtraKeyHandler.test.ts | 2 +- .../cli/src/lib/pooling/tests/ensureLogFile.test.ts | 2 +- .../cli/src/lib/pooling/tests/extractBlocks.test.ts | 2 +- .../src/lib/pooling/tests/getWorkerLogPaths.test.ts | 6 +++--- packages/cli/src/lib/pooling/tests/initLogDir.test.ts | 2 +- .../pooling/tests/installInteractiveSwitcher.test.ts | 2 +- packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts | 2 +- packages/cli/src/lib/pooling/tests/isLogError.test.ts | 2 +- .../cli/src/lib/pooling/tests/isLogNewHeader.test.ts | 2 +- packages/cli/src/lib/pooling/tests/isLogWarn.test.ts | 2 +- .../cli/src/lib/pooling/tests/makeLineSplitter.test.ts | 2 +- packages/cli/src/lib/pooling/tests/runPool.test.ts | 10 +++++----- .../lib/pooling/tests/safeGetLogPathsForSlot.test.ts | 6 +++--- packages/cli/src/lib/pooling/tests/safeSend.test.ts | 2 +- .../cli/src/lib/pooling/tests/showCombinedLogs.test.ts | 2 +- .../src/lib/pooling/tests/spawnWorkerProcess.test.ts | 6 +++--- packages/cli/src/lib/pooling/tests/uiPlugins.test.ts | 2 +- packages/sdk/src/index.ts | 1 - packages/utils/package.json | 1 + packages/utils/src/index.ts | 1 + packages/{sdk => utils}/src/pooling/ensureLogFile.ts | 0 packages/{sdk => utils}/src/pooling/index.ts | 0 packages/{sdk => utils}/src/pooling/logRotation.ts | 0 packages/{sdk => utils}/src/pooling/runPool.ts | 2 +- .../src/pooling/safeGetLogPathsForSlot.ts | 0 .../{sdk => utils}/src/pooling/spawnWorkerProcess.ts | 0 packages/{sdk => utils}/src/pooling/types.ts | 0 pnpm-lock.yaml | 3 +++ 48 files changed, 55 insertions(+), 51 deletions(-) rename packages/{sdk => utils}/src/pooling/ensureLogFile.ts (100%) rename packages/{sdk => utils}/src/pooling/index.ts (100%) rename packages/{sdk => utils}/src/pooling/logRotation.ts (100%) rename packages/{sdk => utils}/src/pooling/runPool.ts (99%) rename packages/{sdk => utils}/src/pooling/safeGetLogPathsForSlot.ts (100%) rename packages/{sdk => utils}/src/pooling/spawnWorkerProcess.ts (100%) rename packages/{sdk => utils}/src/pooling/types.ts (100%) diff --git a/packages/cli/src/commands/admin/chunk-csv/impl.ts b/packages/cli/src/commands/admin/chunk-csv/impl.ts index 8ad13c8e..1a48c752 100644 --- a/packages/cli/src/commands/admin/chunk-csv/impl.ts +++ b/packages/cli/src/commands/admin/chunk-csv/impl.ts @@ -3,7 +3,7 @@ import colors from 'colors'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectCsvFilesOrExit } from '../../../lib/helpers/collectCsvFilesOrExit.js'; -import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/sdk'; +import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/utils'; import { computePoolSize, diff --git a/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts b/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts index b17afb6b..de4bd9ad 100644 --- a/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts +++ b/packages/cli/src/commands/admin/chunk-csv/tests/impl.test.ts @@ -23,7 +23,7 @@ const H = vi.hoisted(() => { poolSize?: number; cpuCount?: number; filesTotal?: number; - hooks?: import('@transcend-io/sdk').PoolHooks< + hooks?: import('@transcend-io/utils').PoolHooks< ChunkTask, ChunkProgress, ChunkResult, diff --git a/packages/cli/src/commands/admin/chunk-csv/worker.ts b/packages/cli/src/commands/admin/chunk-csv/worker.ts index e1fea8e2..ee217d94 100644 --- a/packages/cli/src/commands/admin/chunk-csv/worker.ts +++ b/packages/cli/src/commands/admin/chunk-csv/worker.ts @@ -1,6 +1,6 @@ import { chunkOneCsvFile, extractErrorMessage } from '@transcend-io/utils'; -import type { ToWorker } from '@transcend-io/sdk'; +import type { ToWorker } from '@transcend-io/utils'; import { logger } from '../../../logger.js'; /** diff --git a/packages/cli/src/commands/admin/parquet-to-csv/impl.ts b/packages/cli/src/commands/admin/parquet-to-csv/impl.ts index 219e1209..9241f670 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/impl.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/impl.ts @@ -3,7 +3,7 @@ import colors from 'colors'; import type { LocalContext } from '../../../context.js'; import { doneInputValidation } from '../../../lib/cli/done-input-validation.js'; import { collectParquetFilesOrExit } from '../../../lib/helpers/index.js'; -import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/sdk'; +import { CHILD_FLAG, type PoolHooks, runPool } from '@transcend-io/utils'; import { computePoolSize, diff --git a/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts b/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts index 84aa5317..1ea562ef 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/tests/impl.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { LocalContext } from '../../../../context.js'; -import type { PoolHooks } from '@transcend-io/sdk'; +import type { PoolHooks } from '@transcend-io/utils'; import { parquetToCsv, type ParquetToCsvCommandFlags } from '../impl.js'; import { parquetToCsvPlugin } from '../ui/index.js'; import type { ParquetTask, ParquetProgress, ParquetResult } from '../worker.js'; @@ -106,8 +106,8 @@ vi.mock('../../../../lib/helpers/index.js', () => ({ * IMPORTANT: mock the exact module id after resolution. Using the absolute path * to the actual file from *this test file* is reliable for Vitest. */ -vi.mock('@transcend-io/sdk', async () => { - const actual = await vi.importActual('@transcend-io/sdk'); +vi.mock('@transcend-io/utils', async () => { + const actual = await vi.importActual('@transcend-io/utils'); return { ...actual, CHILD_FLAG: H.pooling.CHILD_FLAG, diff --git a/packages/cli/src/commands/admin/parquet-to-csv/worker.ts b/packages/cli/src/commands/admin/parquet-to-csv/worker.ts index da92e32b..f54e9f8d 100644 --- a/packages/cli/src/commands/admin/parquet-to-csv/worker.ts +++ b/packages/cli/src/commands/admin/parquet-to-csv/worker.ts @@ -1,7 +1,7 @@ import { extractErrorMessage } from '@transcend-io/utils'; import { parquetToCsvOneFile } from '../../../lib/helpers/index.js'; -import type { ToWorker } from '@transcend-io/sdk'; +import type { ToWorker } from '@transcend-io/utils'; import { logger } from '../../../logger.js'; export type ParquetTask = { diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts index c7840080..e40d6475 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/ExportManager.ts @@ -10,7 +10,7 @@ import { type ExportArtifactResult, type ExportStatusMap, type LogExportKind, -} from '@transcend-io/sdk'; +} from '@transcend-io/utils'; import { copyToClipboard, diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts index 4602b823..f8077ccc 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/artifactAbsPath.ts @@ -1,6 +1,6 @@ import { join, resolve } from 'node:path'; -import type { LogExportKind } from '@transcend-io/sdk'; +import type { LogExportKind } from '@transcend-io/utils'; export interface ExportArtifactStatus { /** The absolute path to the export artifact */ diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts index cfb77473..ead7a751 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/ExportManager.test.ts @@ -43,7 +43,7 @@ vi.mock('../artifactAbsPath.js', () => ({ ), })); -vi.mock('@transcend-io/sdk', () => ({ +vi.mock('@transcend-io/utils', () => ({ extractBlocks: H.fns.extractBlocks, isLogError: vi.fn(() => false), isLogWarn: vi.fn(() => false), diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts index 9873a4ac..d46da2bd 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/tests/writeExportsIndex.test.ts @@ -3,7 +3,7 @@ import * as nodeUrl from 'node:url'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import type { ExportStatusMap } from '@transcend-io/sdk'; +import type { ExportStatusMap } from '@transcend-io/utils'; import { writeExportsIndex } from '../writeExportsIndex.js'; /** diff --git a/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts b/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts index 1e0b4d75..fb2353b4 100644 --- a/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts +++ b/packages/cli/src/commands/consent/upload-preferences/artifacts/writeExportsIndex.ts @@ -3,7 +3,7 @@ import { mkdirSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { pathToFileURL } from 'node:url'; -import type { ExportStatusMap } from '@transcend-io/sdk'; +import type { ExportStatusMap } from '@transcend-io/utils'; import { artifactAbsPath, type ExportKindWithCsv } from './artifactAbsPath.js'; let lastIndexFileContents = ''; diff --git a/packages/cli/src/commands/consent/upload-preferences/impl.ts b/packages/cli/src/commands/consent/upload-preferences/impl.ts index f698ca0f..2a9c5ea8 100644 --- a/packages/cli/src/commands/consent/upload-preferences/impl.ts +++ b/packages/cli/src/commands/consent/upload-preferences/impl.ts @@ -12,7 +12,7 @@ import { type PoolHooks, runPool, buildExportStatus, -} from '@transcend-io/sdk'; +} from '@transcend-io/utils'; import { computePoolSize, diff --git a/packages/cli/src/commands/consent/upload-preferences/worker.ts b/packages/cli/src/commands/consent/upload-preferences/worker.ts index a9946e52..72181328 100644 --- a/packages/cli/src/commands/consent/upload-preferences/worker.ts +++ b/packages/cli/src/commands/consent/upload-preferences/worker.ts @@ -4,8 +4,8 @@ import { join, dirname } from 'node:path'; import { buildTranscendGraphQLClient, createSombraGotInstance, - type ToWorker, } from '@transcend-io/sdk'; +import type { ToWorker } from '@transcend-io/utils'; import { splitCsvToList } from '@transcend-io/utils'; import { logger } from '../../../logger.js'; import { getFilePrefix } from './artifacts/index.js'; diff --git a/packages/cli/src/lib/pooling/createExtraKeyHandler.ts b/packages/cli/src/lib/pooling/createExtraKeyHandler.ts index 3b5917d7..2b90f5cc 100644 --- a/packages/cli/src/lib/pooling/createExtraKeyHandler.ts +++ b/packages/cli/src/lib/pooling/createExtraKeyHandler.ts @@ -1,4 +1,4 @@ -import type { ExportStatusMap, SlotPaths } from '@transcend-io/sdk'; +import type { ExportStatusMap, SlotPaths } from '@transcend-io/utils'; import { showCombinedLogs, type LogLocation } from './showCombinedLogs.js'; diff --git a/packages/cli/src/lib/pooling/dashboardPlugin.ts b/packages/cli/src/lib/pooling/dashboardPlugin.ts index b7c9ef62..038ca1cb 100644 --- a/packages/cli/src/lib/pooling/dashboardPlugin.ts +++ b/packages/cli/src/lib/pooling/dashboardPlugin.ts @@ -4,7 +4,7 @@ import * as readline from 'node:readline'; import type { ObjByString } from '@transcend-io/type-utils'; import colors from 'colors'; -import type { SlotState } from '@transcend-io/sdk'; +import type { SlotState } from '@transcend-io/utils'; /** * A dashboard plugin defines how to render the worker pool UI. diff --git a/packages/cli/src/lib/pooling/exportCombinedLogs.ts b/packages/cli/src/lib/pooling/exportCombinedLogs.ts index 6ad35262..cbeb355a 100644 --- a/packages/cli/src/lib/pooling/exportCombinedLogs.ts +++ b/packages/cli/src/lib/pooling/exportCombinedLogs.ts @@ -3,7 +3,7 @@ import { once } from 'node:events'; import { createReadStream, createWriteStream, mkdirSync, statSync } from 'node:fs'; import { basename, join } from 'node:path'; -import type { SlotPaths, WorkerLogPaths } from '@transcend-io/sdk'; +import type { SlotPaths, WorkerLogPaths } from '@transcend-io/utils'; /** Which combined log to export */ export type LogKind = 'error' | 'warn' | 'info' | 'all'; diff --git a/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts b/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts index a52db164..0c335713 100644 --- a/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts +++ b/packages/cli/src/lib/pooling/installInteractiveSwitcher.ts @@ -5,7 +5,7 @@ import { DEBUG } from '../../constants.js'; import { keymap } from './keymap.js'; import { replayFileTailToStdout } from './replayFileTailToStdout.js'; import type { WhichLogs } from './showCombinedLogs.js'; -import type { WorkerLogPaths } from '@transcend-io/sdk'; +import type { WorkerLogPaths } from '@transcend-io/utils'; import { cycleWorkers, getWorkerIds } from './workerIds.js'; /** diff --git a/packages/cli/src/lib/pooling/showCombinedLogs.ts b/packages/cli/src/lib/pooling/showCombinedLogs.ts index 80376a99..8a963c6e 100644 --- a/packages/cli/src/lib/pooling/showCombinedLogs.ts +++ b/packages/cli/src/lib/pooling/showCombinedLogs.ts @@ -1,7 +1,7 @@ /* eslint-disable no-continue, no-control-regex */ import { readFileSync } from 'node:fs'; -import type { WorkerLogPaths } from '@transcend-io/sdk'; +import type { WorkerLogPaths } from '@transcend-io/utils'; /** * Log locations diff --git a/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts b/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts index 0c46cbff..8347bf93 100644 --- a/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts +++ b/packages/cli/src/lib/pooling/tests/buildExportStatus.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { buildExportStatus } from '@transcend-io/sdk'; +import { buildExportStatus } from '@transcend-io/utils'; describe('buildExportStatus', () => { it('returns expected paths for all export artifacts', () => { diff --git a/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts b/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts index 5dc6087c..b7e9ddec 100644 --- a/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts +++ b/packages/cli/src/lib/pooling/tests/classifyLogLevel.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { classifyLogLevel } from '@transcend-io/sdk'; +import { classifyLogLevel } from '@transcend-io/utils'; describe('classifyLogLevel', () => { it('detects explicit worker tags', () => { diff --git a/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts b/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts index 55f532be..ae5c5341 100644 --- a/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts +++ b/packages/cli/src/lib/pooling/tests/createExtraKeyHandler.test.ts @@ -1,7 +1,7 @@ /* eslint-disable max-lines */ import { describe, it, expect, vi, beforeEach, afterEach, afterAll } from 'vitest'; -import type { ExportStatusMap, SlotPaths } from '@transcend-io/sdk'; +import type { ExportStatusMap, SlotPaths } from '@transcend-io/utils'; /** * Mock the combined logs viewer. We assert calls and control resolution/rejection. diff --git a/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts b/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts index 9aef86fc..fdd8a712 100644 --- a/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts +++ b/packages/cli/src/lib/pooling/tests/ensureLogFile.test.ts @@ -2,7 +2,7 @@ import { existsSync, openSync, closeSync } from 'node:fs'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { ensureLogFile } from '@transcend-io/sdk'; +import { ensureLogFile } from '@transcend-io/utils'; /** * Mock fs BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts b/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts index 026df55b..23d1fdbd 100644 --- a/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts +++ b/packages/cli/src/lib/pooling/tests/extractBlocks.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { extractBlocks } from '@transcend-io/sdk'; +import { extractBlocks } from '@transcend-io/utils'; /** * Blocks should start when `starts(cleanLine)` returns true, diff --git a/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts b/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts index 975cba31..032ad85b 100644 --- a/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts +++ b/packages/cli/src/lib/pooling/tests/getWorkerLogPaths.test.ts @@ -3,7 +3,7 @@ import { createWriteStream, type WriteStream } from 'node:fs'; import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { ensureLogFile, getWorkerLogPaths, spawnWorkerProcess } from '@transcend-io/sdk'; +import { ensureLogFile, getWorkerLogPaths, spawnWorkerProcess } from '@transcend-io/utils'; import { openLogTailWindowMulti } from '../openTerminal.js'; @@ -19,8 +19,8 @@ vi.mock('node:fs', () => ({ vi.mock('../openTerminal.js', () => ({ openLogTailWindowMulti: vi.fn(), })); -vi.mock('@transcend-io/sdk', async () => { - const actual = await vi.importActual('@transcend-io/sdk'); +vi.mock('@transcend-io/utils', async () => { + const actual = await vi.importActual('@transcend-io/utils'); const makeLineSplitter = vi.fn( (cb: (line: string) => void) => (chunk: unknown) => cb(String(chunk)), ); diff --git a/packages/cli/src/lib/pooling/tests/initLogDir.test.ts b/packages/cli/src/lib/pooling/tests/initLogDir.test.ts index 56770e1f..f6e1e0fe 100644 --- a/packages/cli/src/lib/pooling/tests/initLogDir.test.ts +++ b/packages/cli/src/lib/pooling/tests/initLogDir.test.ts @@ -2,7 +2,7 @@ import { readdirSync, writeFileSync, existsSync, unlinkSync, mkdirSync } from 'n import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { initLogDir } from '@transcend-io/sdk'; +import { initLogDir } from '@transcend-io/utils'; /** * Mock colors BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts b/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts index 024db428..81f09132 100644 --- a/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts +++ b/packages/cli/src/lib/pooling/tests/installInteractiveSwitcher.test.ts @@ -7,7 +7,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { installInteractiveSwitcher } from '../installInteractiveSwitcher.js'; import { keymap } from '../keymap.js'; import { replayFileTailToStdout } from '../replayFileTailToStdout.js'; -import type { WorkerLogPaths } from '@transcend-io/sdk'; +import type { WorkerLogPaths } from '@transcend-io/utils'; import { getWorkerIds, cycleWorkers } from '../workerIds.js'; /** diff --git a/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts b/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts index 2d506b36..1d8b2197 100644 --- a/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts +++ b/packages/cli/src/lib/pooling/tests/isIpcOpen.test.ts @@ -2,7 +2,7 @@ import type { ChildProcess } from 'node:child_process'; import { describe, it, expect } from 'vitest'; -import { isIpcOpen } from '@transcend-io/sdk'; +import { isIpcOpen } from '@transcend-io/utils'; /** * Build a minimal ChildProcess-like object for isIpcOpen tests. diff --git a/packages/cli/src/lib/pooling/tests/isLogError.test.ts b/packages/cli/src/lib/pooling/tests/isLogError.test.ts index 1b6d4b86..03467ce9 100644 --- a/packages/cli/src/lib/pooling/tests/isLogError.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogError.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogError } from '@transcend-io/sdk'; +import { isLogError } from '@transcend-io/utils'; describe('isLogError', () => { it('matches ERROR and runtime fatal indicators', () => { diff --git a/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts b/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts index e52b9330..0b76a076 100644 --- a/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogNewHeader.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogNewHeader } from '@transcend-io/sdk'; +import { isLogNewHeader } from '@transcend-io/utils'; describe('isLogNewHeader', () => { it('is true for errors, warnings, worker tags, and ISO timestamps', () => { diff --git a/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts b/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts index 3f170601..3de213c5 100644 --- a/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts +++ b/packages/cli/src/lib/pooling/tests/isLogWarn.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { isLogWarn } from '@transcend-io/sdk'; +import { isLogWarn } from '@transcend-io/utils'; describe('isLogWarn', () => { it('matches WARN/WARNING case-insensitively', () => { diff --git a/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts b/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts index 8d796c8e..450c1adc 100644 --- a/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts +++ b/packages/cli/src/lib/pooling/tests/makeLineSplitter.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest'; -import { makeLineSplitter } from '@transcend-io/sdk'; +import { makeLineSplitter } from '@transcend-io/utils'; describe('makeLineSplitter', () => { it('emits one line per newline across chunk boundaries', () => { diff --git a/packages/cli/src/lib/pooling/tests/runPool.test.ts b/packages/cli/src/lib/pooling/tests/runPool.test.ts index d428e6b1..dd520d29 100644 --- a/packages/cli/src/lib/pooling/tests/runPool.test.ts +++ b/packages/cli/src/lib/pooling/tests/runPool.test.ts @@ -3,7 +3,7 @@ import { EventEmitter } from 'node:events'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; /* SUT */ -import { runPool } from '@transcend-io/sdk'; +import { runPool } from '@transcend-io/utils'; /* colors → identity */ vi.mock('colors', () => ({ @@ -38,8 +38,8 @@ const mGetWorkerLogPaths = vi.fn().mockReturnValue({ }); const mSpawnWorkerProcess = vi.fn(); -vi.mock('@transcend-io/sdk', async () => { - const actual = await vi.importActual('@transcend-io/sdk'); +vi.mock('@transcend-io/utils', async () => { + const actual = await vi.importActual('@transcend-io/utils'); return { ...actual, safeSend: (...a: unknown[]) => mSafeSend(...a), @@ -169,7 +169,7 @@ describe('runPool', () => { cpuCount: 8, filesTotal: 1, childFlag: '--as-child', - hooks: hooks as unknown as import('@transcend-io/sdk').PoolHooks, + hooks: hooks as unknown as import('@transcend-io/utils').PoolHooks, render: (snap) => { renders.push({ title: snap.title, @@ -273,7 +273,7 @@ describe('runPool', () => { cpuCount: 4, filesTotal: 1, childFlag: '--as-child', - hooks: hooks as unknown as import('@transcend-io/sdk').PoolHooks, + hooks: hooks as unknown as import('@transcend-io/utils').PoolHooks, viewerMode: true, render: (snap) => flags.push(snap.final), }); diff --git a/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts b/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts index 708fed1a..8b762907 100644 --- a/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts +++ b/packages/cli/src/lib/pooling/tests/safeGetLogPathsForSlot.test.ts @@ -7,7 +7,7 @@ import { isIpcOpen, getWorkerLogPaths, type WorkerLogPaths, -} from '@transcend-io/sdk'; +} from '@transcend-io/utils'; /** * Mock collaborators BEFORE importing the SUT. @@ -17,8 +17,8 @@ import { * Since the SUT imports from './spawnWorkerProcess', and this test lives in ../tests, * the correct mock specifier here is '../spawnWorkerProcess'. */ -vi.mock('@transcend-io/sdk', async () => { - const actual = await vi.importActual('@transcend-io/sdk'); +vi.mock('@transcend-io/utils', async () => { + const actual = await vi.importActual('@transcend-io/utils'); return { ...actual, isIpcOpen: vi.fn(), diff --git a/packages/cli/src/lib/pooling/tests/safeSend.test.ts b/packages/cli/src/lib/pooling/tests/safeSend.test.ts index 5a77d530..7c0f522f 100644 --- a/packages/cli/src/lib/pooling/tests/safeSend.test.ts +++ b/packages/cli/src/lib/pooling/tests/safeSend.test.ts @@ -2,7 +2,7 @@ import type { ChildProcess } from 'node:child_process'; import { describe, it, expect, vi } from 'vitest'; -import { safeSend } from '@transcend-io/sdk'; +import { safeSend } from '@transcend-io/utils'; /** * Make a ChildProcess-shaped object. diff --git a/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts b/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts index c0dd9c9d..70b041fb 100644 --- a/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts +++ b/packages/cli/src/lib/pooling/tests/showCombinedLogs.test.ts @@ -3,7 +3,7 @@ import { readFileSync } from 'node:fs'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { showCombinedLogs, type WhichLogs } from '../showCombinedLogs.js'; -import type { WorkerLogPaths } from '@transcend-io/sdk'; +import type { WorkerLogPaths } from '@transcend-io/utils'; /** * Mock fs BEFORE importing the SUT. diff --git a/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts b/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts index d13c8aaf..6c06affe 100644 --- a/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts +++ b/packages/cli/src/lib/pooling/tests/spawnWorkerProcess.test.ts @@ -8,7 +8,7 @@ import { classifyLogLevel, CHILD_FLAG, spawnWorkerProcess, -} from '@transcend-io/sdk'; +} from '@transcend-io/utils'; import { openLogTailWindowMulti } from '../openTerminal.js'; @@ -24,8 +24,8 @@ vi.mock('node:fs', () => ({ vi.mock('../openTerminal.js', () => ({ openLogTailWindowMulti: vi.fn(), })); -vi.mock('@transcend-io/sdk', async () => { - const actual = await vi.importActual('@transcend-io/sdk'); +vi.mock('@transcend-io/utils', async () => { + const actual = await vi.importActual('@transcend-io/utils'); const makeLineSplitter = vi.fn( (cb: (line: string) => void) => (chunk: unknown) => cb(String(chunk)), ); diff --git a/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts b/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts index 0de9738f..4b3e65e1 100644 --- a/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts +++ b/packages/cli/src/lib/pooling/tests/uiPlugins.test.ts @@ -2,7 +2,7 @@ import type { ObjByString } from '@transcend-io/type-utils'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { CommonCtx } from '../dashboardPlugin.js'; -import type { SlotState } from '@transcend-io/sdk'; +import type { SlotState } from '@transcend-io/utils'; import { fmtNum, pctBar, poolProgress, makeHeader, makeWorkerRows } from '../uiPlugins.js'; /** diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index fd222430..e8ded619 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -23,4 +23,3 @@ export * from './api/index.js'; export * from './data-inventory/index.js'; export * from './preference-management/index.js'; export * from './preference-upload/index.js'; -export * from './pooling/index.js'; diff --git a/packages/utils/package.json b/packages/utils/package.json index a184b487..b6bfea6c 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -34,6 +34,7 @@ "check:publint": "publint --level warning --strict --pack pnpm" }, "dependencies": { + "@transcend-io/type-utils": "^1.8.9", "bluebird": "^3.7.2", "csv-parse": "^5.6.0", "fast-csv": "^4.3.6" diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 881011d5..ecc4fa0e 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -39,3 +39,4 @@ export * from './retrySamePromise.js'; export * from './chunkOneCsvFile.js'; export * from './bluebird.js'; export * from './splitCsvToList.js'; +export * from './pooling/index.js'; diff --git a/packages/sdk/src/pooling/ensureLogFile.ts b/packages/utils/src/pooling/ensureLogFile.ts similarity index 100% rename from packages/sdk/src/pooling/ensureLogFile.ts rename to packages/utils/src/pooling/ensureLogFile.ts diff --git a/packages/sdk/src/pooling/index.ts b/packages/utils/src/pooling/index.ts similarity index 100% rename from packages/sdk/src/pooling/index.ts rename to packages/utils/src/pooling/index.ts diff --git a/packages/sdk/src/pooling/logRotation.ts b/packages/utils/src/pooling/logRotation.ts similarity index 100% rename from packages/sdk/src/pooling/logRotation.ts rename to packages/utils/src/pooling/logRotation.ts diff --git a/packages/sdk/src/pooling/runPool.ts b/packages/utils/src/pooling/runPool.ts similarity index 99% rename from packages/sdk/src/pooling/runPool.ts rename to packages/utils/src/pooling/runPool.ts index 641cfe8a..b79680f7 100644 --- a/packages/sdk/src/pooling/runPool.ts +++ b/packages/utils/src/pooling/runPool.ts @@ -1,7 +1,7 @@ import type { ChildProcess } from 'node:child_process'; import type { ObjByString } from '@transcend-io/type-utils'; -import { RateCounter } from '@transcend-io/utils'; +import { RateCounter } from '../RateCounter.js'; /* eslint-disable max-lines */ import { classifyLogLevel, initLogDir, makeLineSplitter } from './logRotation.js'; diff --git a/packages/sdk/src/pooling/safeGetLogPathsForSlot.ts b/packages/utils/src/pooling/safeGetLogPathsForSlot.ts similarity index 100% rename from packages/sdk/src/pooling/safeGetLogPathsForSlot.ts rename to packages/utils/src/pooling/safeGetLogPathsForSlot.ts diff --git a/packages/sdk/src/pooling/spawnWorkerProcess.ts b/packages/utils/src/pooling/spawnWorkerProcess.ts similarity index 100% rename from packages/sdk/src/pooling/spawnWorkerProcess.ts rename to packages/utils/src/pooling/spawnWorkerProcess.ts diff --git a/packages/sdk/src/pooling/types.ts b/packages/utils/src/pooling/types.ts similarity index 100% rename from packages/sdk/src/pooling/types.ts rename to packages/utils/src/pooling/types.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 49d85f2a..2d4d4c24 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -356,6 +356,9 @@ importers: packages/utils: dependencies: + '@transcend-io/type-utils': + specifier: ^1.8.9 + version: 1.8.9 bluebird: specifier: ^3.7.2 version: 3.7.2 From 41a679477f7dd546885ba6ec79ce21d6f3fdee5c Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Mon, 30 Mar 2026 17:26:15 -0700 Subject: [PATCH 09/10] rm --- .../scripts/reconcile-preference-records.ts | 846 ------------------ 1 file changed, 846 deletions(-) delete mode 100644 packages/cli/scripts/reconcile-preference-records.ts diff --git a/packages/cli/scripts/reconcile-preference-records.ts b/packages/cli/scripts/reconcile-preference-records.ts deleted file mode 100644 index 0916b0ae..00000000 --- a/packages/cli/scripts/reconcile-preference-records.ts +++ /dev/null @@ -1,846 +0,0 @@ -#!/usr/bin/env node -/* eslint-disable max-len */ -/* eslint-disable jsdoc/require-description,jsdoc/require-returns,jsdoc/require-param-description,@typescript-eslint/no-explicit-any,max-lines,no-continue,no-loop-func,no-param-reassign */ - -import fs from 'node:fs'; -import path from 'node:path'; - -import { decodeCodec } from '@transcend-io/type-utils'; -import Bluebird from 'bluebird'; -import cliProgress from 'cli-progress'; -import colors from 'colors'; -import type { Options as CsvParseOptions } from 'csv-parse'; -import { parse as parseCsvSync } from 'csv-parse/sync'; -import type { Got } from 'got'; -import * as t from 'io-ts'; -import { chunk, uniqBy } from 'lodash-es'; - -import { createSombraGotInstance } from '../src/lib/graphql/index.js'; -import { getPreferencesForIdentifiers } from '../src/lib/preference-management/index.js'; -import { logger } from '../src/logger.js'; -// import { extractErrorMessage } from './lib/helpers'; - -const { map } = Bluebird; - -/** - * - */ -type Identifier = { - /** */ name: string /** */; - /** */ - value: string; -}; - -/** - * - */ -type PreferenceRecord = { - /** */ - identifiers?: Identifier[]; - /** */ - purposes?: Array<{ - /** */ - purpose: string; - /** */ - enabled: boolean; - /** */ - preferences?: Array<{ - /** */ - topic?: string; - /** */ - choice?: unknown; - }>; - }>; - [k: string]: unknown; -}; - -/** - * - */ -type Options = { - /** */ - in: string; - /** */ - partition: string; - /** */ - batchSize: number; - /** */ - downloadLogInterval: number; - /** */ - transcendUrl: string; - /** */ - transcendApiKey: string; - /** */ - sombraApiKey?: string; -}; - -/** - * - * @param pathToFile - * @param codec - * @param options - */ -export function readCsv( - pathToFile: string, - codec: T, - options: CsvParseOptions = {}, -): t.TypeOf[] { - const fileContent = parseCsvSync(fs.readFileSync(pathToFile, 'utf-8'), { - columns: true, - relax_column_count: true, - relax_quotes: true, - skip_empty_lines: true, - trim: true, - ...options, - }); - - const data = decodeCodec(t.array(codec), fileContent); - - const parsed = data.map((datum) => - Object.entries(datum).reduce( - (acc, [key, value]) => - Object.assign(acc, { - [key.replace(/[^a-z_.+\-A-Z -~]/g, '')]: value, - }), - {} as any, - ), - ); - - return parsed as any; -} - -const OutRowCodec = t.intersection([ - t.type({ - personID: t.string, - transcendID: t.string, - email_withheld: t.string, - }), - t.record(t.string, t.unknown), -]); - -/** - * - */ -type OutRow = t.TypeOf; - -/** - * - */ -type RowMetrics = { - /** */ - lookupBy: 'transcendID' | 'personID'; - /** */ - lookupValue: string; - - /** Total records AFTER unique-by-fingerprint */ - totalRecords: number; - - /** Total records BEFORE unique-by-fingerprint */ - totalRecordsRaw: number; - - /** */ - email: string; - /** */ - emailCount: number; - /** */ - multiEmail: boolean; - - /** */ - distinctVariants: number; - /** */ - largestVariantCount: number; - /** */ - identicalRecordCount: number; - /** */ - allIdentical: boolean; - - /** */ - isDuplicateRow: boolean; - /** */ - dupOfRowIndex: number; - - /** JSON dump of RAW records (before unique) */ - recordsJson: string; - /** */ - recordsJsonTruncated: boolean; - - /** */ - runAttempted: boolean; - /** */ - runUpdated: boolean; - /** */ - runUpdateIdentifier: 'transcendID' | 'email' | ''; - /** */ - runError: string; -}; - -/** - * - * @param record - */ -function fingerprintRecord(record: PreferenceRecord): string { - const ids = (record.identifiers ?? []) - .filter((x) => x?.name && x?.value) - .map((x) => ({ name: String(x.name), value: String(x.value) })) - .sort((a, b) => - a.name === b.name ? a.value.localeCompare(b.value) : a.name.localeCompare(b.name), - ); - - const purposes = (record.purposes ?? []) - .map((p) => { - const prefs = (p.preferences ?? []) - .map((pr) => ({ topic: pr.topic ?? '', choice: pr.choice ?? null })) - .sort((a, b) => String(a.topic).localeCompare(String(b.topic))); - - return { purpose: p.purpose, enabled: !!p.enabled, preferences: prefs }; - }) - .sort((a, b) => String(a.purpose).localeCompare(String(b.purpose))); - - return JSON.stringify({ identifiers: ids, purposes }); -} - -/** - * - * @param records - */ -function getUniqueEmails(records: PreferenceRecord[]): string[] { - const set = new Set(); - for (const r of records) { - for (const id of r.identifiers ?? []) { - if (id?.name === 'email' && id.value) { - const v = String(id.value).trim(); - if (v) set.add(v); - } - } - } - return Array.from(set).sort((a, b) => a.localeCompare(b)); -} - -/** - * Build lookup map with duplicates preserved. - * - * @param records - */ -function buildLookupMapAll(records: PreferenceRecord[]): Map { - const m = new Map(); - for (const r of records) { - for (const id of r.identifiers ?? []) { - if (!id?.name || !id?.value) continue; - const k = `${id.name}:${id.value}`; - const arr = m.get(k) ?? []; - arr.push(r); - m.set(k, arr); - } - } - return m; -} - -/** - * - * @param records - */ -function uniqueByFingerprint(records: PreferenceRecord[]): PreferenceRecord[] { - const seen = new Set(); - const out: PreferenceRecord[] = []; - for (const r of records) { - const fp = fingerprintRecord(r); - if (seen.has(fp)) continue; - seen.add(fp); - out.push(r); - } - return out; -} - -/** - * - * @param records - */ -function countRawVsUniqueByFingerprint(records: PreferenceRecord[]): { - /** */ - raw: number; - /** */ - unique: number; -} { - const seen = new Set(); - for (const r of records) seen.add(fingerprintRecord(r)); - return { raw: records.length, unique: seen.size }; -} - -/** - * Fetch preferences for NON-shared identifiers (transcendID/email) in a batch, - * caching raw results PER identifier value. - * - * IMPORTANT: Cache stores RAW arrays (not deduped). - * - * @param sombra - * @param opts - */ -async function getPreferencesForIdentifiersCachedRaw( - sombra: Got, - opts: { - /** */ - identifiers: Identifier[]; - /** */ - partitionKey: string; - /** */ - logInterval: number; - /** */ - cache: Map; - /** */ - counters: { - /** */ hit: number /** */; - /** */ - miss: number; - }; - }, -): Promise { - if (opts.identifiers.length === 0) return []; - - const toFetch: Identifier[] = []; - const fromCache: PreferenceRecord[] = []; - - for (const id of opts.identifiers) { - const k = `${id.name}:${id.value}`; - const cached = opts.cache.get(k); - if (cached) { - opts.counters.hit += 1; - fromCache.push(...cached); - } else { - opts.counters.miss += 1; - toFetch.push(id); - } - } - - if (toFetch.length === 0) return fromCache; - - const fetched = (await getPreferencesForIdentifiers(sombra, { - identifiers: toFetch, - partitionKey: opts.partitionKey, - concurrency: 50, - logInterval: opts.logInterval, - skipLogging: true, - })) as any as PreferenceRecord[]; - - // Populate cache PER identifier by scanning returned records. - // This preserves duplicates in the cached arrays. - const lookupFetched = buildLookupMapAll(fetched); - for (const id of toFetch) { - const k = `${id.name}:${id.value}`; - opts.cache.set(k, lookupFetched.get(k) ?? []); - } - - return [...fromCache, ...fetched]; -} - -/** - * Shared identifiers (personID) must be queried one-by-one. - * Cache stores RAW arrays per identifier. - * - * @param sombra - * @param opts - */ -async function getPreferencesForSharedIdentifiersOneByOneCachedRaw( - sombra: Got, - opts: { - /** */ - identifiers: Identifier[]; - /** */ - partitionKey: string; - /** */ - cache: Map; - /** */ - counters: { - /** */ hit: number /** */; - /** */ - miss: number; - }; - }, -): Promise { - if (opts.identifiers.length === 0) return []; - - const results = await map( - opts.identifiers, - async (identifier) => { - const cacheKey = `${identifier.name}:${identifier.value}`; - const cached = opts.cache.get(cacheKey); - if (cached) { - opts.counters.hit += 1; - return cached; - } - opts.counters.miss += 1; - - const recs = (await getPreferencesForIdentifiers(sombra, { - identifiers: [identifier], - partitionKey: opts.partitionKey, - concurrency: 1, - logInterval: 999999999, - skipLogging: true, - })) as any as PreferenceRecord[]; - - opts.cache.set(cacheKey, recs); - return recs; - }, - { concurrency: 25 }, - ); - - return results.flat(); -} - -/** - * - * @param sombra - * @param args - */ -async function putIdentifierOnly( - sombra: Got, - args: { - /** */ partition: string /** */; - /** */ - identifier: Identifier; - }, -): Promise { - try { - await sombra - .put('v1/preferences', { - json: { - records: [ - { - timestamp: new Date(Date.now() - 365 * 24 * 60 * 60 * 1000).toISOString(), - partition: args.partition, - identifiers: [args.identifier], - }, - ], - skipWorkflowTriggers: true, - }, - }) - .json(); - } catch (e) { - throw new Error(`Failed to put identifier: ${e?.response?.body}`); - } -} - -/** - * - * @param v - */ -function csvEscape(v: string): string { - const s = v ?? ''; - if (/[",\n\r]/.test(s)) return `"${s.replace(/"/g, '""')}"`; - return s; -} - -/** - * - * @param n - */ -function ms(n: number): string { - if (n < 1000) return `${n}ms`; - return `${(n / 1000).toFixed(2)}s`; -} - -/** - * - */ -async function main(): Promise { - const inputCsv = process.env.INPUT_CSV; - const opts: Options = { - in: path.resolve(inputCsv ?? ''), - partition: process.env.PARTITION ?? '', - batchSize: Number(process.env.BATCH_SIZE ?? '500'), - downloadLogInterval: Number(process.env.DOWNLOAD_LOG_INTERVAL ?? '100'), - transcendUrl: process.env.TRANSCEND_URL ?? '', - transcendApiKey: process.env.TRANSCEND_API_KEY ?? '', - sombraApiKey: process.env.SOMBRA_API_KEY, - }; - - const runEnabled = String(process.env.RUN ?? '').toLowerCase() === 'true'; - const maxJsonChars = Number(process.env.MAX_JSON_CHARS ?? '50000'); - - if (!inputCsv || !opts.partition || !opts.transcendUrl || !opts.transcendApiKey) { - throw new Error( - 'Missing one or more required environment variables: INPUT_CSV, PARTITION, TRANSCEND_URL, TRANSCEND_API_KEY.', - ); - } - - const t0 = Date.now(); - - logger.info(colors.green(`Reading CSV: ${opts.in}`)); - const rows = readCsv(opts.in, OutRowCodec, { columns: true }) as OutRow[]; - - const rawFile = fs.readFileSync(opts.in, 'utf-8'); - const headerLine = rawFile.split(/\r?\n/)[0] ?? 'personID,transcendID,email_withheld'; - - const extraHeaders = [ - 'lookupBy', - 'lookupValue', - 'totalRecordsRaw', - 'totalRecords', - 'distinctVariants', - 'largestVariantCount', - 'identicalRecordCount', - 'allIdentical', - 'email', - 'emailCount', - 'multiEmail', - 'isDuplicateRow', - 'dupOfRowIndex', - 'recordsJsonTruncated', - 'recordsJson', - 'runAttempted', - 'runUpdated', - 'runUpdateIdentifier', - 'runError', - ]; - - logger.info(colors.green('Creating Sombra client...')); - const sombra = await createSombraGotInstance( - opts.transcendUrl, - opts.transcendApiKey, - opts.sombraApiKey, - ); - - // RAW caches (per identifier value) - const personIdCache = new Map(); - const transcendIdCache = new Map(); - const emailCache = new Map(); - - // duplicate-row tracking - const rowSeen = new Map(); - - const outTmp = `${opts.in}.tmp`; - const writer = fs.createWriteStream(outTmp, { encoding: 'utf8' }); - writer.write(`${headerLine},${extraHeaders.join(',')}\n`); - - const batches = chunk(rows, opts.batchSize); - logger.info( - colors.magenta( - `Processing ${rows.length} rows in ${batches.length} batches (batchSize=${opts.batchSize}) RUN=${runEnabled}`, - ), - ); - - const progressBar = new cliProgress.SingleBar( - { - format: `Rows |${colors.cyan( - '{bar}', - )}| {value}/{total} | {percentage}% | ETA {eta}s | batch {batch}/{batches}`, - }, - cliProgress.Presets.shades_classic, - ); - progressBar.start(rows.length, 0, { batch: 0, batches: batches.length }); - - let processed = 0; - let written = 0; - let dupRows = 0; - - for (let batchIndex = 0; batchIndex < batches.length; batchIndex += 1) { - const batch = batches[batchIndex]; - const batchT0 = Date.now(); - - progressBar.update(processed, { - batch: batchIndex + 1, - batches: batches.length, - }); - - // counters for cache efficiency - const cTrans = { hit: 0, miss: 0 }; - const cEmail = { hit: 0, miss: 0 }; - const cPerson = { hit: 0, miss: 0 }; - - const stage0 = Date.now(); - const transcendIDs: Identifier[] = uniqBy( - batch - .map((r) => { - const v = String((r as any).transcendID ?? '').trim(); - return v ? ({ name: 'transcendID', value: v } as Identifier) : null; - }) - .filter(Boolean) as Identifier[], - (x) => `${x.name}:${x.value}`, - ); - - const personIDs: Identifier[] = uniqBy( - batch - .map((r) => { - const v = String((r as any).personID ?? '').trim(); - return v ? ({ name: 'personID', value: v } as Identifier) : null; - }) - .filter(Boolean) as Identifier[], - (x) => `${x.name}:${x.value}`, - ); - - const emails: Identifier[] = uniqBy( - batch - .map((r) => { - const v = String((r as any).email ?? '').trim(); - return v && v.includes('@') ? ({ name: 'email', value: v } as Identifier) : null; - }) - .filter(Boolean) as Identifier[], - (x) => `${x.name}:${x.value}`, - ); - - const idBuildMs = Date.now() - stage0; - - // FETCH RAW (no dedupe here) - const stage1 = Date.now(); - const [recordsByTranscendRaw, recordsByPersonRaw, recordsByEmailRaw] = await Promise.all([ - getPreferencesForIdentifiersCachedRaw(sombra, { - identifiers: transcendIDs, - partitionKey: opts.partition, - logInterval: opts.downloadLogInterval, - cache: transcendIdCache, - counters: cTrans, - }), - getPreferencesForSharedIdentifiersOneByOneCachedRaw(sombra, { - identifiers: personIDs, - partitionKey: opts.partition, - cache: personIdCache, - counters: cPerson, - }), - getPreferencesForIdentifiersCachedRaw(sombra, { - identifiers: emails, - partitionKey: opts.partition, - logInterval: opts.downloadLogInterval, - cache: emailCache, - counters: cEmail, - }), - ]); - const fetchMs = Date.now() - stage1; - - // Batch-level “how many dupes” metrics (raw vs unique-by-fingerprint) - const tCounts = countRawVsUniqueByFingerprint(recordsByTranscendRaw); - const pCounts = countRawVsUniqueByFingerprint(recordsByPersonRaw); - const eCounts = countRawVsUniqueByFingerprint(recordsByEmailRaw); - - const allRaw = [ - ...(recordsByTranscendRaw as PreferenceRecord[]), - ...(recordsByPersonRaw as PreferenceRecord[]), - ...(recordsByEmailRaw as PreferenceRecord[]), - ]; - const allCounts = countRawVsUniqueByFingerprint(allRaw); - - // Lookup map MUST preserve duplicates so per-row raw length is meaningful - const lookupMapRaw = buildLookupMapAll(allRaw); - - // ROW PROCESSING - const stage2 = Date.now(); - const results = await map( - batch, - async (r, idxInBatch) => { - const rowIndex = batchIndex * opts.batchSize + idxInBatch + 1; - - const personID = String((r as any).personID ?? '').trim(); - const transcendID = String((r as any).transcendID ?? '').trim(); - const emailWithheld = String((r as any).email_withheld ?? '').trim(); - const email = String((r as any).email ?? '').trim(); - - const lookupBy: 'transcendID' | 'personID' = transcendID ? 'transcendID' : 'personID'; - const lookupValue = transcendID || personID || ''; - - const rowKey = `${personID}||${transcendID}||${emailWithheld}`; - const firstSeenAt = rowSeen.get(rowKey) ?? 0; - const isDup = firstSeenAt > 0; - if (!isDup) rowSeen.set(rowKey, rowIndex); - else dupRows += 1; - - const metrics: RowMetrics = { - lookupBy, - lookupValue, - totalRecordsRaw: 0, - totalRecords: 0, - - email: '', - emailCount: 0, - multiEmail: false, - - distinctVariants: 0, - largestVariantCount: 0, - identicalRecordCount: 0, - allIdentical: false, - - isDuplicateRow: isDup, - dupOfRowIndex: isDup ? firstSeenAt : 0, - - recordsJson: '', - recordsJsonTruncated: false, - - runAttempted: false, - runUpdated: false, - runUpdateIdentifier: '', - runError: '', - }; - - if (!lookupValue) { - metrics.runError = 'Missing both transcendID and personID'; - return { personID, transcendID, emailWithheld, metrics }; - } - - // RAW matches (duplicates preserved) - const recsRaw: PreferenceRecord[] = [ - ...(transcendID ? (lookupMapRaw.get(`transcendID:${transcendID}`) ?? []) : []), - // ...(personID ? lookupMapRaw.get(`personID:${personID}`) ?? [] : []), - ...(email && email.includes('@') ? (lookupMapRaw.get(`email:${email}`) ?? []) : []), - ]; - - metrics.totalRecordsRaw = recsRaw.length; - - // Unique-by-fingerprint (this is where you measure “how many dupes”) - const recsUnique = uniqueByFingerprint(recsRaw); - metrics.totalRecords = recsUnique.length; - - // email source of truth from UNIQUE set - const emailsFound = getUniqueEmails(recsUnique); - metrics.emailCount = emailsFound.length; - metrics.multiEmail = emailsFound.length > 1; - metrics.email = emailsFound.length === 1 ? emailsFound[0] : ''; - - // variant stats on UNIQUE set - if (recsUnique.length > 0) { - metrics.distinctVariants = recsUnique.length; // because recsUnique is unique by fingerprint - metrics.largestVariantCount = 1; - metrics.identicalRecordCount = 1; - metrics.allIdentical = recsUnique.length === 1; - } - - // JSON dump of RAW records (pre-unique) - let json = ''; - try { - json = JSON.stringify(recsRaw); - } catch (e: any) { - json = JSON.stringify({ - error: 'Failed to stringify recsRaw', - message: e?.message ?? String(e), - }); - } - if (maxJsonChars > 0 && json.length > maxJsonChars) { - metrics.recordsJsonTruncated = true; - metrics.recordsJson = json.slice(0, maxJsonChars); - } else { - metrics.recordsJson = json; - } - - if (runEnabled) { - metrics.runAttempted = true; - try { - if (transcendID) { - await putIdentifierOnly(sombra, { - partition: opts.partition, - identifier: { name: 'transcendID', value: transcendID }, - }); - metrics.runUpdated = true; - metrics.runUpdateIdentifier = 'transcendID'; - } else if (emailsFound.length === 1) { - await putIdentifierOnly(sombra, { - partition: opts.partition, - identifier: { name: 'email', value: emailsFound[0] }, - }); - metrics.runUpdated = true; - metrics.runUpdateIdentifier = 'email'; - } else if (emailsFound.length === 0) { - metrics.runError = - 'RUN enabled but no transcendID and no email found in existing records'; - } else { - metrics.runError = `RUN enabled but multiple emails found (${emailsFound.length})`; - } - } catch (err: any) { - metrics.runError = err?.message ?? String(err); - } - if (metrics.runError) { - logger.warn(colors.yellow(`Row ${rowIndex} update error: ${metrics.runError}`)); - } - } - - return { personID, transcendID, emailWithheld, metrics }; - }, - { concurrency: 50 }, - ); - const processMs = Date.now() - stage2; - - // WRITE (always write 1 output row per input row) - const stage3 = Date.now(); - for (const { personID, transcendID, emailWithheld, metrics } of results as any) { - writer.write( - `${[ - csvEscape(personID), - csvEscape(transcendID), - csvEscape(emailWithheld), - - csvEscape(metrics.lookupBy), - csvEscape(metrics.lookupValue), - csvEscape(String(metrics.totalRecordsRaw)), - csvEscape(String(metrics.totalRecords)), - csvEscape(String(metrics.distinctVariants)), - csvEscape(String(metrics.largestVariantCount)), - csvEscape(String(metrics.identicalRecordCount)), - csvEscape(String(metrics.allIdentical)), - - csvEscape(metrics.email), - csvEscape(String(metrics.emailCount)), - csvEscape(String(metrics.multiEmail)), - - csvEscape(String(metrics.isDuplicateRow)), - csvEscape(String(metrics.dupOfRowIndex)), - - csvEscape(String(metrics.recordsJsonTruncated)), - csvEscape(metrics.recordsJson), - - csvEscape(String(metrics.runAttempted)), - csvEscape(String(metrics.runUpdated)), - csvEscape(metrics.runUpdateIdentifier), - csvEscape(metrics.runError), - ].join(',')}\n`, - ); - written += 1; - } - const writeMs = Date.now() - stage3; - - processed += batch.length; - progressBar.update(processed, { - batch: batchIndex + 1, - batches: batches.length, - }); - - const batchMs = Date.now() - batchT0; - - // This is the key log you want: raw vs unique before any dedupe. - logger.info( - colors.green( - `Batch ${batchIndex + 1}/${batches.length} rows=${ - batch.length - } written=${written} dupRows=${dupRows} ` + - `| ids: tID=${transcendIDs.length} email=${emails.length} personID=${personIDs.length} ` + - `| fetched(raw/uniq): tID=${tCounts.raw}/${tCounts.unique} ` + - `email=${eCounts.raw}/${eCounts.unique} personID=${pCounts.raw}/${pCounts.unique} ` + - `ALL=${allCounts.raw}/${allCounts.unique} ` + - `| cache(hit/miss): tID=${cTrans.hit}/${cTrans.miss} email=${cEmail.hit}/${cEmail.miss} personID=${cPerson.hit}/${cPerson.miss} ` + - `| timing: build=${ms(idBuildMs)} fetch=${ms(fetchMs)} process=${ms( - processMs, - )} write=${ms(writeMs)} total=${ms(batchMs)}`, - ), - ); - } - - progressBar.update(rows.length); - progressBar.stop(); - - await new Promise((resolve, reject) => { - writer.end(() => resolve()); - writer.on('error', reject); - }); - - fs.renameSync(outTmp, opts.in); - - const totalMs = Date.now() - t0; - logger.info( - colors.magenta( - `Done. Wrote ${written}/${rows.length} rows (dupRows=${dupRows}) to "${ - opts.in - }" in ${ms(totalMs)}.`, - ), - ); -} - -main().catch((err) => { - logger.error(colors.red(err?.stack ?? String(err))); - process.exit(1); -}); -/* eslint-enable jsdoc/require-description,jsdoc/require-returns,jsdoc/require-param-description,@typescript-eslint/no-explicit-any,max-lines,no-continue,no-loop-func,no-param-reassign */ -/* eslint-enable max-len */ From 4e96a28a0089809c76dedf3207346ec08c7e83a2 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Mon, 30 Mar 2026 17:28:09 -0700 Subject: [PATCH 10/10] rev --- packages/utils/src/pooling/tests/buildExportStatus.test.ts | 3 ++- packages/utils/src/pooling/tests/classifyLogLevel.test.ts | 3 ++- packages/utils/src/pooling/tests/ensureLogFile.test.ts | 3 ++- packages/utils/src/pooling/tests/extractBlocks.test.ts | 3 ++- packages/utils/src/pooling/tests/isIpcOpen.test.ts | 3 ++- packages/utils/src/pooling/tests/isLogError.test.ts | 3 ++- packages/utils/src/pooling/tests/isLogNewHeader.test.ts | 3 ++- packages/utils/src/pooling/tests/isLogWarn.test.ts | 3 ++- packages/utils/src/pooling/tests/makeLineSplitter.test.ts | 3 ++- packages/utils/src/pooling/tests/safeSend.test.ts | 3 ++- 10 files changed, 20 insertions(+), 10 deletions(-) diff --git a/packages/utils/src/pooling/tests/buildExportStatus.test.ts b/packages/utils/src/pooling/tests/buildExportStatus.test.ts index 6e2438b6..7a42fd3f 100644 --- a/packages/utils/src/pooling/tests/buildExportStatus.test.ts +++ b/packages/utils/src/pooling/tests/buildExportStatus.test.ts @@ -1,6 +1,7 @@ -import { buildExportStatus } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { buildExportStatus } from '../logRotation.js'; + describe('buildExportStatus', () => { it('returns expected paths for all export artifacts', () => { const out = buildExportStatus('/logs'); diff --git a/packages/utils/src/pooling/tests/classifyLogLevel.test.ts b/packages/utils/src/pooling/tests/classifyLogLevel.test.ts index 6f943375..cd496ca5 100644 --- a/packages/utils/src/pooling/tests/classifyLogLevel.test.ts +++ b/packages/utils/src/pooling/tests/classifyLogLevel.test.ts @@ -1,6 +1,7 @@ -import { classifyLogLevel } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { classifyLogLevel } from '../logRotation.js'; + describe('classifyLogLevel', () => { it('detects explicit worker tags', () => { expect(classifyLogLevel('[w12] WARN something')).toBe('warn'); diff --git a/packages/utils/src/pooling/tests/ensureLogFile.test.ts b/packages/utils/src/pooling/tests/ensureLogFile.test.ts index 2ad35c9f..31eb50f4 100644 --- a/packages/utils/src/pooling/tests/ensureLogFile.test.ts +++ b/packages/utils/src/pooling/tests/ensureLogFile.test.ts @@ -1,8 +1,9 @@ import { existsSync, openSync, closeSync } from 'node:fs'; -import { ensureLogFile } from '@transcend-io/utils'; import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ensureLogFile } from '../ensureLogFile.js'; + /** * Mock fs BEFORE importing the SUT. * Inline factory avoids Vitest hoisting pitfalls. diff --git a/packages/utils/src/pooling/tests/extractBlocks.test.ts b/packages/utils/src/pooling/tests/extractBlocks.test.ts index ba6844d5..57090de3 100644 --- a/packages/utils/src/pooling/tests/extractBlocks.test.ts +++ b/packages/utils/src/pooling/tests/extractBlocks.test.ts @@ -1,6 +1,7 @@ -import { extractBlocks } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { extractBlocks } from '../logRotation.js'; + /** * Blocks should start when `starts(cleanLine)` returns true, * continue until a blank line or a new header-like line, diff --git a/packages/utils/src/pooling/tests/isIpcOpen.test.ts b/packages/utils/src/pooling/tests/isIpcOpen.test.ts index 1dbf6d60..7e56ac79 100644 --- a/packages/utils/src/pooling/tests/isIpcOpen.test.ts +++ b/packages/utils/src/pooling/tests/isIpcOpen.test.ts @@ -1,8 +1,9 @@ import type { ChildProcess } from 'node:child_process'; -import { isIpcOpen } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { isIpcOpen } from '../spawnWorkerProcess.js'; + /** * Build a minimal ChildProcess-like object for isIpcOpen tests. * diff --git a/packages/utils/src/pooling/tests/isLogError.test.ts b/packages/utils/src/pooling/tests/isLogError.test.ts index 129bbc84..78d24ddd 100644 --- a/packages/utils/src/pooling/tests/isLogError.test.ts +++ b/packages/utils/src/pooling/tests/isLogError.test.ts @@ -1,6 +1,7 @@ -import { isLogError } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { isLogError } from '../logRotation.js'; + describe('isLogError', () => { it('matches ERROR and runtime fatal indicators', () => { expect(isLogError('something ERROR happened')).toBe(true); diff --git a/packages/utils/src/pooling/tests/isLogNewHeader.test.ts b/packages/utils/src/pooling/tests/isLogNewHeader.test.ts index 7e8bc8a2..b5c9d2ef 100644 --- a/packages/utils/src/pooling/tests/isLogNewHeader.test.ts +++ b/packages/utils/src/pooling/tests/isLogNewHeader.test.ts @@ -1,6 +1,7 @@ -import { isLogNewHeader } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { isLogNewHeader } from '../logRotation.js'; + describe('isLogNewHeader', () => { it('is true for errors, warnings, worker tags, and ISO timestamps', () => { expect(isLogNewHeader('ERROR boom')).toBe(true); diff --git a/packages/utils/src/pooling/tests/isLogWarn.test.ts b/packages/utils/src/pooling/tests/isLogWarn.test.ts index 22bd5ded..be271bfa 100644 --- a/packages/utils/src/pooling/tests/isLogWarn.test.ts +++ b/packages/utils/src/pooling/tests/isLogWarn.test.ts @@ -1,6 +1,7 @@ -import { isLogWarn } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { isLogWarn } from '../logRotation.js'; + describe('isLogWarn', () => { it('matches WARN/WARNING case-insensitively', () => { expect(isLogWarn('WARN disk low')).toBe(true); diff --git a/packages/utils/src/pooling/tests/makeLineSplitter.test.ts b/packages/utils/src/pooling/tests/makeLineSplitter.test.ts index 1eb62513..b538e7d2 100644 --- a/packages/utils/src/pooling/tests/makeLineSplitter.test.ts +++ b/packages/utils/src/pooling/tests/makeLineSplitter.test.ts @@ -1,6 +1,7 @@ -import { makeLineSplitter } from '@transcend-io/utils'; import { describe, it, expect } from 'vitest'; +import { makeLineSplitter } from '../logRotation.js'; + describe('makeLineSplitter', () => { it('emits one line per newline across chunk boundaries', () => { const lines: string[] = []; diff --git a/packages/utils/src/pooling/tests/safeSend.test.ts b/packages/utils/src/pooling/tests/safeSend.test.ts index 3d81eb51..1370872d 100644 --- a/packages/utils/src/pooling/tests/safeSend.test.ts +++ b/packages/utils/src/pooling/tests/safeSend.test.ts @@ -1,8 +1,9 @@ import type { ChildProcess } from 'node:child_process'; -import { safeSend } from '@transcend-io/utils'; import { describe, it, expect, vi } from 'vitest'; +import { safeSend } from '../spawnWorkerProcess.js'; + /** * Make a ChildProcess-shaped object. *