From 836202aab5f954c80da06f6a081d68bd35d487fb Mon Sep 17 00:00:00 2001 From: anotherminh Date: Wed, 4 Mar 2026 15:37:30 -0500 Subject: [PATCH 1/7] Support cursor pagination for fetching requestIdentifiers --- README.md | 11 ++++++++- package.json | 2 +- .../fetchAllRequestIdentifierMetadata.ts | 19 ++++++++++----- src/lib/graphql/fetchAllRequestIdentifiers.ts | 24 ++++++++++++------- src/lib/graphql/gqls/RequestIdentifier.ts | 10 ++++++-- 5 files changed, 48 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index c6aafabe..ddfdd3c4 100644 --- a/README.md +++ b/README.md @@ -1075,7 +1075,7 @@ transcend request export --auth="$TRANSCEND_API_KEY" --concurrency=500 transcend request export --auth="$TRANSCEND_API_KEY" --showTests=false ``` -**Filter for requests within a date range** +**Filter for requests created within a date range** ```sh transcend request export \ @@ -1084,6 +1084,15 @@ transcend request export \ --createdAtAfter=2025-02-21T00:00:00.000Z ``` +**Filter for requests last updated within a date range** + +```sh +transcend request export \ + --auth="$TRANSCEND_API_KEY" \ + --updatedAtBefore=2025-04-05T00:00:00.000Z \ + --updatedAtAfter=2025-02-21T00:00:00.000Z +``` + **Write to a specific file location** ```sh diff --git a/package.json b/package.json index 74b2dd1b..d2c0f8ae 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "author": "Transcend Inc.", "name": "@transcend-io/cli", "description": "A command line interface for programmatic operations across Transcend.", - "version": "8.38.0", + "version": "8.39.0", "homepage": "https://github.com/transcend-io/cli", "repository": { "type": "git", diff --git a/src/lib/graphql/fetchAllRequestIdentifierMetadata.ts b/src/lib/graphql/fetchAllRequestIdentifierMetadata.ts index f9a7395f..0bbc2227 100644 --- a/src/lib/graphql/fetchAllRequestIdentifierMetadata.ts +++ b/src/lib/graphql/fetchAllRequestIdentifierMetadata.ts @@ -11,7 +11,7 @@ export interface RequestIdentifierMetadata { isVerifiedAtLeastOnce: boolean; } -const PAGE_SIZE = 50; +const PAGE_SIZE = 2000; /** * Fetch all request identifier metadata for a particular request @@ -41,22 +41,29 @@ export async function fetchAllRequestIdentifierMetadata( const resolvedRequestIds = requestIds ?? (requestId ? [requestId] : undefined); const requestIdentifiers: RequestIdentifierMetadata[] = []; - let offset = 0; + let cursor: string | undefined; // Paginate let shouldContinue = false; do { const { - requestIdentifiers: { nodes }, + requestIdentifiers: { nodes, pageInfo }, } = await makeGraphQLRequest<{ /** Request Identifiers */ requestIdentifiers: { /** List */ nodes: RequestIdentifierMetadata[]; + /** Pagination info */ + pageInfo: { + /** Cursor for the last item */ + endCursor: string | null; + /** Whether more pages exist */ + hasNextPage: boolean; + }; }; }>(client, REQUEST_IDENTIFIERS, { first: PAGE_SIZE, - offset, + after: cursor, requestIds: resolvedRequestIds, updatedAtBefore: updatedAtBefore ? updatedAtBefore.toISOString() @@ -64,8 +71,8 @@ export async function fetchAllRequestIdentifierMetadata( updatedAtAfter: updatedAtAfter ? updatedAtAfter.toISOString() : undefined, }); requestIdentifiers.push(...nodes); - offset += PAGE_SIZE; - shouldContinue = nodes.length === PAGE_SIZE; + cursor = pageInfo.endCursor ?? undefined; + shouldContinue = pageInfo.hasNextPage; } while (shouldContinue); return requestIdentifiers; diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index ba03ee99..3cca3db1 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -24,10 +24,16 @@ const RequestIdentifier = t.type({ /** Type override */ export type RequestIdentifier = t.TypeOf; -const PAGE_SIZE = 50; +const PAGE_SIZE = 100; + +const PageInfo = t.type({ + endCursor: t.union([t.string, t.null]), + hasNextPage: t.boolean, +}); export const RequestIdentifiersResponse = t.type({ identifiers: t.array(RequestIdentifier), + pageInfo: PageInfo, }); /** @@ -45,11 +51,11 @@ export async function fetchAllRequestIdentifiers( requestId, }: { /** ID of request to filter on */ - requestId: string; - }, + requestId?: string; + } = {}, ): Promise { const requestIdentifiers: RequestIdentifier[] = []; - let offset = 0; + let cursor: string | undefined; let shouldContinue = false; // determine sombra version @@ -81,10 +87,12 @@ export async function fetchAllRequestIdentifiers( .post<{ /** Decrypted identifiers */ identifiers: RequestIdentifier[]; + /** Pagination info */ + pageInfo: { endCursor: string | null; hasNextPage: boolean }; }>('v1/request-identifiers', { json: { first: PAGE_SIZE, - offset, + after: cursor, requestId, }, }) @@ -97,15 +105,15 @@ export async function fetchAllRequestIdentifiers( ); } - const { identifiers: nodes } = decodeCodec( + const { identifiers: nodes, pageInfo } = decodeCodec( RequestIdentifiersResponse, response, ); requestIdentifiers.push(...nodes); - offset += PAGE_SIZE; - shouldContinue = nodes.length === PAGE_SIZE; + cursor = pageInfo.endCursor ?? undefined; + shouldContinue = pageInfo.hasNextPage; } while (shouldContinue); return requestIdentifiers; diff --git a/src/lib/graphql/gqls/RequestIdentifier.ts b/src/lib/graphql/gqls/RequestIdentifier.ts index 466a28fc..5ff7e83c 100644 --- a/src/lib/graphql/gqls/RequestIdentifier.ts +++ b/src/lib/graphql/gqls/RequestIdentifier.ts @@ -15,7 +15,7 @@ export const REMOVE_REQUEST_IDENTIFIERS = gql` export const REQUEST_IDENTIFIERS = gql` query TranscendCliRequestIdentifiers( $first: Int! - $offset: Int! + $after: String $requestIds: [ID!] $updatedAtBefore: Date $updatedAtAfter: Date @@ -23,11 +23,13 @@ export const REQUEST_IDENTIFIERS = gql` requestIdentifiers( input: { requestIds: $requestIds + } + filterBy: { updatedAtBefore: $updatedAtBefore updatedAtAfter: $updatedAtAfter } first: $first - offset: $offset + after: $after useMaster: false orderBy: [ { field: createdAt, direction: ASC } @@ -40,6 +42,10 @@ export const REQUEST_IDENTIFIERS = gql` isVerifiedAtLeastOnce } totalCount + pageInfo { + endCursor + hasNextPage + } } } `; From c380d97ae4f2f9f331a0d03f8fff843148150e58 Mon Sep 17 00:00:00 2001 From: anotherminh Date: Wed, 4 Mar 2026 15:41:25 -0500 Subject: [PATCH 2/7] ci --- README.md | 11 +---------- src/lib/graphql/gqls/RequestIdentifier.ts | 4 +--- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index ddfdd3c4..c6aafabe 100644 --- a/README.md +++ b/README.md @@ -1075,7 +1075,7 @@ transcend request export --auth="$TRANSCEND_API_KEY" --concurrency=500 transcend request export --auth="$TRANSCEND_API_KEY" --showTests=false ``` -**Filter for requests created within a date range** +**Filter for requests within a date range** ```sh transcend request export \ @@ -1084,15 +1084,6 @@ transcend request export \ --createdAtAfter=2025-02-21T00:00:00.000Z ``` -**Filter for requests last updated within a date range** - -```sh -transcend request export \ - --auth="$TRANSCEND_API_KEY" \ - --updatedAtBefore=2025-04-05T00:00:00.000Z \ - --updatedAtAfter=2025-02-21T00:00:00.000Z -``` - **Write to a specific file location** ```sh diff --git a/src/lib/graphql/gqls/RequestIdentifier.ts b/src/lib/graphql/gqls/RequestIdentifier.ts index 5ff7e83c..4ed3970f 100644 --- a/src/lib/graphql/gqls/RequestIdentifier.ts +++ b/src/lib/graphql/gqls/RequestIdentifier.ts @@ -21,9 +21,7 @@ export const REQUEST_IDENTIFIERS = gql` $updatedAtAfter: Date ) { requestIdentifiers( - input: { - requestIds: $requestIds - } + input: { requestIds: $requestIds } filterBy: { updatedAtBefore: $updatedAtBefore updatedAtAfter: $updatedAtAfter From 53233e9342601d828c3e56b6613590748004d59b Mon Sep 17 00:00:00 2001 From: anotherminh Date: Wed, 4 Mar 2026 15:58:48 -0500 Subject: [PATCH 3/7] lint --- src/lib/graphql/fetchAllRequestIdentifiers.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index 3cca3db1..c902e358 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -88,7 +88,12 @@ export async function fetchAllRequestIdentifiers( /** Decrypted identifiers */ identifiers: RequestIdentifier[]; /** Pagination info */ - pageInfo: { endCursor: string | null; hasNextPage: boolean }; + pageInfo: { + /** Cursor for the last item */ + endCursor: string | null; + /** Whether more pages exist */ + hasNextPage: boolean; + }; }>('v1/request-identifiers', { json: { first: PAGE_SIZE, @@ -99,8 +104,7 @@ export async function fetchAllRequestIdentifiers( .json(); } catch (err) { throw new Error( - `Failed to fetch request identifiers: ${ - err?.response?.body || err?.message + `Failed to fetch request identifiers: ${err?.response?.body || err?.message }`, ); } From 839a32fac5c6ea13407daa380fc8db0c22a6c20e Mon Sep 17 00:00:00 2001 From: anotherminh Date: Wed, 4 Mar 2026 16:12:24 -0500 Subject: [PATCH 4/7] ci --- src/lib/graphql/fetchAllRequestIdentifiers.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index c902e358..8fdda940 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -104,7 +104,8 @@ export async function fetchAllRequestIdentifiers( .json(); } catch (err) { throw new Error( - `Failed to fetch request identifiers: ${err?.response?.body || err?.message + `Failed to fetch request identifiers: ${ + err?.response?.body || err?.message }`, ); } From 76d655db54939be4c547a33fb5502947db9da03f Mon Sep 17 00:00:00 2001 From: anotherminh Date: Fri, 6 Mar 2026 09:37:17 -0500 Subject: [PATCH 5/7] DRY --- package.json | 6 +- src/lib/graphql/fetchAllRequestIdentifiers.ts | 10 +-- src/lib/requests/index.ts | 1 + src/lib/requests/pullPrivacyRequests.ts | 84 +++++++------------ src/lib/requests/splitDateRange.ts | 30 +++++++ .../requests/streamPrivacyRequestsToCsv.ts | 32 +------ 6 files changed, 66 insertions(+), 97 deletions(-) create mode 100644 src/lib/requests/splitDateRange.ts diff --git a/package.json b/package.json index 168e396a..f7b53fee 100644 --- a/package.json +++ b/package.json @@ -2,11 +2,7 @@ "author": "Transcend Inc.", "name": "@transcend-io/cli", "description": "A command line interface for programmatic operations across Transcend.", -<<<<<<< HEAD - "version": "8.39.0", -======= - "version": "8.38.2", ->>>>>>> main + "version": "8.38.3", "homepage": "https://github.com/transcend-io/cli", "repository": { "type": "git", diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index f96e0972..ba112768 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -90,14 +90,14 @@ export async function fetchAllRequestIdentifiers( }, ): Promise { const requestIdentifiers: RequestIdentifier[] = []; - let offset = 0; - let shouldContinue = false; + let cursor: string | undefined; + let shouldContinue = true; if (!skipSombraCheck) { await validateSombraVersion(client); } - do { + while (shouldContinue) { let response: unknown; try { response = await sombra! @@ -114,7 +114,7 @@ export async function fetchAllRequestIdentifiers( }>('v1/request-identifiers', { json: { first: PAGE_SIZE, - after: cursor, + after: cursor ?? undefined, requestId, }, }) @@ -135,7 +135,7 @@ export async function fetchAllRequestIdentifiers( cursor = pageInfo.endCursor ?? undefined; shouldContinue = pageInfo.hasNextPage; - } while (shouldContinue); + } return requestIdentifiers; } diff --git a/src/lib/requests/index.ts b/src/lib/requests/index.ts index f027453f..a7fdecf5 100644 --- a/src/lib/requests/index.ts +++ b/src/lib/requests/index.ts @@ -31,3 +31,4 @@ export * from './pullPrivacyRequests'; export * from './streamPrivacyRequestsToCsv'; export * from './skipRequestDataSilos'; export * from './removeUnverifiedRequestIdentifiers'; +export * from './splitDateRange'; diff --git a/src/lib/requests/pullPrivacyRequests.ts b/src/lib/requests/pullPrivacyRequests.ts index 78b2b4a4..ba0ad586 100644 --- a/src/lib/requests/pullPrivacyRequests.ts +++ b/src/lib/requests/pullPrivacyRequests.ts @@ -17,33 +17,7 @@ import { CsvRow, ExportedPrivacyRequest, } from './formatRequestForCsv'; - -/** - * Split a date range into N evenly-spaced chunks. - * - * @param after - Start of the date range - * @param before - End of the date range - * @param chunks - Number of chunks to split into - * @returns Array of date range bounds - */ -function splitDateRange( - after: Date, - before: Date, - chunks: number, -): { - /** Chunk start */ createdAtAfter: Date; - /** Chunk end */ createdAtBefore: Date; -}[] { - const /** Range start ms */ start = after.getTime(); - const /** Range end ms */ end = before.getTime(); - const /** Ms per chunk */ chunkSize = (end - start) / chunks; - return Array.from({ length: chunks }, (_, i) => ({ - createdAtAfter: new Date(start + chunkSize * i), - createdAtBefore: new Date( - i === chunks - 1 ? end : start + chunkSize * (i + 1), - ), - })); -} +import { splitDateRange } from './splitDateRange'; /** * Pull down a list of privacy requests @@ -110,16 +84,14 @@ export async function pullPrivacyRequests({ dateRange += ` before ${createdAtBefore.toISOString()}`; } if (createdAtAfter) { - dateRange += `${ - dateRange ? ', and' : '' - } after ${createdAtAfter.toISOString()}`; + dateRange += `${dateRange ? ', and' : '' + } after ${createdAtAfter.toISOString()}`; } logger.info( colors.magenta( - `${ - actions.length > 0 - ? `Pulling requests of type "${actions.join('" , "')}"` - : 'Pulling all requests' + `${actions.length > 0 + ? `Pulling requests of type "${actions.join('" , "')}"` + : 'Pulling all requests' }${dateRange}`, ), ); @@ -164,29 +136,29 @@ export async function pullPrivacyRequests({ // Fetch the request identifiers for those requests const requestsWithRequestIdentifiers = skipRequestIdentifiers ? requests.map((request) => ({ - ...request, - requestIdentifiers: [] as RequestIdentifier[], - })) + ...request, + requestIdentifiers: [] as RequestIdentifier[], + })) : await map( - requests, - async (request) => { - const requestIdentifiers = await fetchAllRequestIdentifiers( - client, - sombra, - { - requestId: request.id, - skipSombraCheck: true, - }, - ); - return { - ...request, - requestIdentifiers, - }; - }, - { - concurrency: pageLimit, - }, - ); + requests, + async (request) => { + const requestIdentifiers = await fetchAllRequestIdentifiers( + client, + sombra, + { + requestId: request.id, + skipSombraCheck: true, + }, + ); + return { + ...request, + requestIdentifiers, + }; + }, + { + concurrency: pageLimit, + }, + ); logger.info( colors.magenta(`Pulled ${requestsWithRequestIdentifiers.length} requests`), diff --git a/src/lib/requests/splitDateRange.ts b/src/lib/requests/splitDateRange.ts new file mode 100644 index 00000000..0606ef59 --- /dev/null +++ b/src/lib/requests/splitDateRange.ts @@ -0,0 +1,30 @@ +export interface ChunkedDateRange { + /** Chunk start */ + createdAtAfter: Date; + /** Chunk end */ + createdAtBefore: Date; +} + +/** + * Split a date range into N evenly-spaced chunks. + * + * @param after - Start of the date range + * @param before - End of the date range + * @param chunks - Number of chunks to split into + * @returns Array of date range bounds + */ +export function splitDateRange( + after: Date, + before: Date, + chunks: number, +): ChunkedDateRange[] { + const startMs = after.getTime(); + const endMs = before.getTime(); + const chunkSizeMs = (endMs - startMs) / chunks; + return Array.from({ length: chunks }, (_, i) => ({ + createdAtAfter: new Date(startMs + chunkSizeMs * i), + createdAtBefore: new Date( + i === chunks - 1 ? endMs : startMs + chunkSizeMs * (i + 1), + ), + })); +} diff --git a/src/lib/requests/streamPrivacyRequestsToCsv.ts b/src/lib/requests/streamPrivacyRequestsToCsv.ts index d11d1642..38c64fe5 100644 --- a/src/lib/requests/streamPrivacyRequestsToCsv.ts +++ b/src/lib/requests/streamPrivacyRequestsToCsv.ts @@ -19,37 +19,7 @@ import { formatRequestForCsv, ExportedPrivacyRequest, } from './formatRequestForCsv'; - -interface ChunkedDateRange { - /** Chunk start */ - createdAtAfter: Date; - /** Chunk end */ - createdAtBefore: Date; -} - -/** - * Split a date range into N evenly-spaced chunks. - * - * @param after - Start of the date range - * @param before - End of the date range - * @param chunks - Number of chunks to split into - * @returns Array of date range bounds - */ -function splitDateRange( - after: Date, - before: Date, - chunks: number, -): ChunkedDateRange[] { - const startMs = after.getTime(); - const endMs = before.getTime(); - const chunkSize = (endMs - startMs) / chunks; - return Array.from({ length: chunks }, (_, i) => ({ - createdAtAfter: new Date(startMs + chunkSize * i), - createdAtBefore: new Date( - i === chunks - 1 ? endMs : startMs + chunkSize * (i + 1), - ), - })); -} +import { splitDateRange } from './splitDateRange'; /** * Stream privacy requests directly to CSV files, one file per date-range chunk. From d93ee0cd8ee96afa278a7c114bf4208821d4f759 Mon Sep 17 00:00:00 2001 From: anotherminh Date: Fri, 6 Mar 2026 09:46:36 -0500 Subject: [PATCH 6/7] batch fetch RequestIdentifiers --- src/lib/graphql/fetchAllRequestIdentifiers.ts | 85 +++++++++++++++++++ .../pullManualEnrichmentIdentifiersToCsv.ts | 62 ++++++-------- src/lib/requests/bulkRestartRequests.ts | 13 +-- src/lib/requests/pullPrivacyRequests.ts | 39 +++------ .../requests/streamPrivacyRequestsToCsv.ts | 34 ++++---- 5 files changed, 151 insertions(+), 82 deletions(-) diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index ba112768..8d4390bf 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -36,6 +36,19 @@ export const RequestIdentifiersResponse = t.type({ pageInfo: PageInfo, }); +const BatchRequestIdentifier = t.type({ + id: t.string, + name: t.string, + value: t.string, + type: valuesOf(IdentifierType), + requestId: t.string, +}); + +const BatchRequestIdentifiersResponse = t.type({ + identifiers: t.array(BatchRequestIdentifier), + pageInfo: PageInfo, +}); + /** * Validate that the Sombra version meets the minimum requirement for * decrypting request identifiers. Call once before bulk-fetching identifiers @@ -139,3 +152,75 @@ export async function fetchAllRequestIdentifiers( return requestIdentifiers; } + +/** + * Fetch request identifiers for multiple requests in a single paginated call. + * Returns a Map keyed by requestId so callers can look up identifiers per request. + * + * @param sombra - Sombra client + * @param options - Options + * @returns Map of requestId to its identifiers + */ +export async function fetchRequestIdentifiersBatch( + sombra: Got, + { + requestIds, + }: { + /** IDs of requests to fetch identifiers for */ + requestIds: string[]; + }, +): Promise> { + const result = new Map(); + + if (requestIds.length === 0) { + return result; + } + + // Ensure every requested ID has an entry even if Sombra returns nothing for it + for (const id of requestIds) { + result.set(id, []); + } + + let cursor: string | undefined; + let shouldContinue = true; + + while (shouldContinue) { + let response: unknown; + try { + response = await sombra + .post('v1/request-identifiers', { + json: { + first: PAGE_SIZE, + after: cursor ?? undefined, + requestIds, + }, + }) + .json(); + } catch (err) { + throw new Error( + `Failed to fetch request identifiers: ${ + err?.response?.body || err?.message + }`, + ); + } + + const { identifiers: nodes, pageInfo } = decodeCodec( + BatchRequestIdentifiersResponse, + response, + ); + + for (const { requestId, ...identifier } of nodes) { + const list = result.get(requestId); + if (list) { + list.push(identifier); + } else { + result.set(requestId, [identifier]); + } + } + + cursor = pageInfo.endCursor ?? undefined; + shouldContinue = pageInfo.hasNextPage; + } + + return result; +} diff --git a/src/lib/manual-enrichment/pullManualEnrichmentIdentifiersToCsv.ts b/src/lib/manual-enrichment/pullManualEnrichmentIdentifiersToCsv.ts index 1076994d..17dbf772 100644 --- a/src/lib/manual-enrichment/pullManualEnrichmentIdentifiersToCsv.ts +++ b/src/lib/manual-enrichment/pullManualEnrichmentIdentifiersToCsv.ts @@ -11,7 +11,7 @@ import { buildTranscendGraphQLClient, createSombraGotInstance, fetchAllRequestEnrichers, - fetchAllRequestIdentifiers, + fetchRequestIdentifiersBatch, fetchAllRequests, validateSombraVersion, } from '../graphql'; @@ -71,45 +71,37 @@ export async function pullManualEnrichmentIdentifiersToCsv({ await validateSombraVersion(client); - // Requests to save - const savedRequests: PrivacyRequestWithIdentifiers[] = []; - - // Filter down requests to what is needed - await map( + // Fetch enrichers for all requests in parallel + const requestsWithEnrichers = await map( allRequests, - async (request) => { - // Fetch enrichers - const requestEnrichers = await fetchAllRequestEnrichers(client, { + async (request) => ({ + request, + requestEnrichers: await fetchAllRequestEnrichers(client, { requestId: request.id, - }); - - // Check if manual enrichment exists for that request - const hasManualEnrichment = requestEnrichers.filter( - ({ status }) => status === 'ACTION_REQUIRED', - ); + }), + }), + { concurrency }, + ); - // Save request to queue - if (hasManualEnrichment) { - const requestIdentifiers = await fetchAllRequestIdentifiers( - client, - sombra, - { - requestId: request.id, - skipSombraCheck: true, - }, - ); - savedRequests.push({ - ...request, - requestIdentifiers, - requestEnrichers, - }); - } - }, - { - concurrency, - }, + // Filter to requests that have manual enrichment + const manualEnrichmentRequests = requestsWithEnrichers.filter( + ({ requestEnrichers }) => + requestEnrichers.filter(({ status }) => status === 'ACTION_REQUIRED') + .length > 0, ); + // Batch-fetch identifiers for all qualifying requests at once + const identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, { + requestIds: manualEnrichmentRequests.map(({ request }) => request.id), + }); + + const savedRequests: PrivacyRequestWithIdentifiers[] = + manualEnrichmentRequests.map(({ request, requestEnrichers }) => ({ + ...request, + requestIdentifiers: identifiersByRequest.get(request.id) ?? [], + requestEnrichers, + })); + const data = savedRequests.map( ({ attributeValues, diff --git a/src/lib/requests/bulkRestartRequests.ts b/src/lib/requests/bulkRestartRequests.ts index ff5bce0e..f8c0b613 100644 --- a/src/lib/requests/bulkRestartRequests.ts +++ b/src/lib/requests/bulkRestartRequests.ts @@ -8,9 +8,10 @@ import { difference } from 'lodash-es'; import { join, resolve } from 'node:path'; import { DEFAULT_TRANSCEND_API } from '../../constants'; import { + RequestIdentifier, buildTranscendGraphQLClient, createSombraGotInstance, - fetchAllRequestIdentifiers, + fetchRequestIdentifiersBatch, fetchAllRequests, validateSombraVersion, } from '../graphql'; @@ -163,8 +164,12 @@ export async function bulkRestartRequests({ } } + let identifiersByRequest: Map | undefined; if (copyIdentifiers) { await validateSombraVersion(client); + identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, { + requestIds: requests.map((r) => r.id), + }); } // Map over the requests @@ -174,12 +179,8 @@ export async function bulkRestartRequests({ requests, async (request, ind) => { try { - // Pull the request identifiers const requestIdentifiers = copyIdentifiers - ? await fetchAllRequestIdentifiers(client, sombra, { - requestId: request.id, - skipSombraCheck: true, - }) + ? identifiersByRequest!.get(request.id) ?? [] : []; // Make the GraphQL request to restart the request diff --git a/src/lib/requests/pullPrivacyRequests.ts b/src/lib/requests/pullPrivacyRequests.ts index ba0ad586..2f98ebfc 100644 --- a/src/lib/requests/pullPrivacyRequests.ts +++ b/src/lib/requests/pullPrivacyRequests.ts @@ -7,7 +7,7 @@ import { RequestIdentifier, buildTranscendGraphQLClient, createSombraGotInstance, - fetchAllRequestIdentifiers, + fetchRequestIdentifiersBatch, fetchAllRequests, validateSombraVersion, } from '../graphql'; @@ -31,7 +31,6 @@ export async function pullPrivacyRequests({ actions = [], statuses = [], identifierSearch, - pageLimit = 100, concurrency = 1, transcendUrl = DEFAULT_TRANSCEND_API, createdAtBefore, @@ -134,31 +133,21 @@ export async function pullPrivacyRequests({ } // Fetch the request identifiers for those requests - const requestsWithRequestIdentifiers = skipRequestIdentifiers - ? requests.map((request) => ({ + let requestsWithRequestIdentifiers: ExportedPrivacyRequest[]; + if (skipRequestIdentifiers) { + requestsWithRequestIdentifiers = requests.map((request) => ({ ...request, requestIdentifiers: [] as RequestIdentifier[], - })) - : await map( - requests, - async (request) => { - const requestIdentifiers = await fetchAllRequestIdentifiers( - client, - sombra, - { - requestId: request.id, - skipSombraCheck: true, - }, - ); - return { - ...request, - requestIdentifiers, - }; - }, - { - concurrency: pageLimit, - }, - ); + })); + } else { + const identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, { + requestIds: requests.map((r) => r.id), + }); + requestsWithRequestIdentifiers = requests.map((request) => ({ + ...request, + requestIdentifiers: identifiersByRequest.get(request.id) ?? [], + })); + } logger.info( colors.magenta(`Pulled ${requestsWithRequestIdentifiers.length} requests`), diff --git a/src/lib/requests/streamPrivacyRequestsToCsv.ts b/src/lib/requests/streamPrivacyRequestsToCsv.ts index 38c64fe5..839f526e 100644 --- a/src/lib/requests/streamPrivacyRequestsToCsv.ts +++ b/src/lib/requests/streamPrivacyRequestsToCsv.ts @@ -8,7 +8,7 @@ import { DEFAULT_TRANSCEND_API } from '../../constants'; import { buildTranscendGraphQLClient, createSombraGotInstance, - fetchAllRequestIdentifiers, + fetchRequestIdentifiersBatch, fetchAllRequests, fetchRequestsTotalCount, validateSombraVersion, @@ -36,7 +36,6 @@ export async function streamPrivacyRequestsToCsv({ statuses = [], identifierSearch, concurrency = 1, - pageLimit = 100, transcendUrl = DEFAULT_TRANSCEND_API, createdAtBefore, createdAtAfter, @@ -194,20 +193,23 @@ export async function streamPrivacyRequestsToCsv({ if (nodes.length === 0) return; // Optionally enrich each request with its identifiers - const enriched: ExportedPrivacyRequest[] = skipRequestIdentifiers - ? nodes.map((n) => ({ ...n, requestIdentifiers: [] })) - : await map( - nodes, - async (n) => ({ - ...n, - requestIdentifiers: await fetchAllRequestIdentifiers( - client, - sombra!, - { requestId: n.id, skipSombraCheck: true }, - ), - }), - { concurrency: pageLimit }, - ); + let enriched: ExportedPrivacyRequest[]; + if (skipRequestIdentifiers) { + enriched = nodes.map((n) => ({ + ...n, + requestIdentifiers: [], + })); + } else { + const identifiersByRequest = + await fetchRequestIdentifiersBatch(sombra!, { + requestIds: nodes.map((n) => n.id), + }); + enriched = nodes.map((n) => ({ + ...n, + requestIdentifiers: + identifiersByRequest.get(n.id) ?? [], + })); + } const rows: Record[] = enriched.map(formatRequestForCsv); From b86c6974856e5d4cc46494b706a440e9f85a5956 Mon Sep 17 00:00:00 2001 From: iamtheluckyest Date: Mon, 16 Mar 2026 11:20:31 -0400 Subject: [PATCH 7/7] refactors --- src/lib/graphql/fetchAllRequestIdentifiers.ts | 9 +++-- src/lib/requests/pullPrivacyRequests.ts | 38 +++++++++---------- .../requests/streamPrivacyRequestsToCsv.ts | 22 ++++------- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/src/lib/graphql/fetchAllRequestIdentifiers.ts b/src/lib/graphql/fetchAllRequestIdentifiers.ts index 8d4390bf..cd58e30a 100644 --- a/src/lib/graphql/fetchAllRequestIdentifiers.ts +++ b/src/lib/graphql/fetchAllRequestIdentifiers.ts @@ -103,7 +103,7 @@ export async function fetchAllRequestIdentifiers( }, ): Promise { const requestIdentifiers: RequestIdentifier[] = []; - let cursor: string | undefined; + let endCursor: string | undefined; let shouldContinue = true; if (!skipSombraCheck) { @@ -127,14 +127,15 @@ export async function fetchAllRequestIdentifiers( }>('v1/request-identifiers', { json: { first: PAGE_SIZE, - after: cursor ?? undefined, + after: endCursor ?? undefined, requestId, }, }) .json(); } catch (err) { throw new Error( - `Failed to fetch request identifiers: ${err?.response?.body || err?.message + `Failed to fetch request identifiers: ${ + err?.response?.body || err?.message }`, ); } @@ -146,7 +147,7 @@ export async function fetchAllRequestIdentifiers( requestIdentifiers.push(...nodes); - cursor = pageInfo.endCursor ?? undefined; + endCursor = pageInfo.endCursor ?? undefined; shouldContinue = pageInfo.hasNextPage; } diff --git a/src/lib/requests/pullPrivacyRequests.ts b/src/lib/requests/pullPrivacyRequests.ts index 2f98ebfc..01ede4c4 100644 --- a/src/lib/requests/pullPrivacyRequests.ts +++ b/src/lib/requests/pullPrivacyRequests.ts @@ -4,7 +4,6 @@ import colors from 'colors'; import { DEFAULT_TRANSCEND_API } from '../../constants'; import { - RequestIdentifier, buildTranscendGraphQLClient, createSombraGotInstance, fetchRequestIdentifiersBatch, @@ -83,14 +82,16 @@ export async function pullPrivacyRequests({ dateRange += ` before ${createdAtBefore.toISOString()}`; } if (createdAtAfter) { - dateRange += `${dateRange ? ', and' : '' - } after ${createdAtAfter.toISOString()}`; + dateRange += `${ + dateRange ? ', and' : '' + } after ${createdAtAfter.toISOString()}`; } logger.info( colors.magenta( - `${actions.length > 0 - ? `Pulling requests of type "${actions.join('" , "')}"` - : 'Pulling all requests' + `${ + actions.length > 0 + ? `Pulling requests of type "${actions.join('" , "')}"` + : 'Pulling all requests' }${dateRange}`, ), ); @@ -133,21 +134,16 @@ export async function pullPrivacyRequests({ } // Fetch the request identifiers for those requests - let requestsWithRequestIdentifiers: ExportedPrivacyRequest[]; - if (skipRequestIdentifiers) { - requestsWithRequestIdentifiers = requests.map((request) => ({ - ...request, - requestIdentifiers: [] as RequestIdentifier[], - })); - } else { - const identifiersByRequest = await fetchRequestIdentifiersBatch(sombra, { - requestIds: requests.map((r) => r.id), - }); - requestsWithRequestIdentifiers = requests.map((request) => ({ - ...request, - requestIdentifiers: identifiersByRequest.get(request.id) ?? [], - })); - } + const identifiersByRequest = skipRequestIdentifiers + ? new Map() + : await fetchRequestIdentifiersBatch(sombra, { + requestIds: requests.map((r) => r.id), + }); + + const requestsWithRequestIdentifiers = requests.map((request) => ({ + ...request, + requestIdentifiers: identifiersByRequest.get(request.id) ?? [], + })); logger.info( colors.magenta(`Pulled ${requestsWithRequestIdentifiers.length} requests`), diff --git a/src/lib/requests/streamPrivacyRequestsToCsv.ts b/src/lib/requests/streamPrivacyRequestsToCsv.ts index 839f526e..410e02ab 100644 --- a/src/lib/requests/streamPrivacyRequestsToCsv.ts +++ b/src/lib/requests/streamPrivacyRequestsToCsv.ts @@ -193,23 +193,15 @@ export async function streamPrivacyRequestsToCsv({ if (nodes.length === 0) return; // Optionally enrich each request with its identifiers - let enriched: ExportedPrivacyRequest[]; - if (skipRequestIdentifiers) { - enriched = nodes.map((n) => ({ - ...n, - requestIdentifiers: [], - })); - } else { - const identifiersByRequest = - await fetchRequestIdentifiersBatch(sombra!, { + const identifiersByRequest = skipRequestIdentifiers + ? new Map() + : await fetchRequestIdentifiersBatch(sombra!, { requestIds: nodes.map((n) => n.id), }); - enriched = nodes.map((n) => ({ - ...n, - requestIdentifiers: - identifiersByRequest.get(n.id) ?? [], - })); - } + const enriched: ExportedPrivacyRequest[] = nodes.map((n) => ({ + ...n, + requestIdentifiers: identifiersByRequest.get(n.id) ?? [], + })); const rows: Record[] = enriched.map(formatRequestForCsv);