diff --git a/packages/cubejs-base-driver/src/storage-fs/aws.fs.ts b/packages/cubejs-base-driver/src/storage-fs/aws.fs.ts index 5132057599404..6e121c87f08c0 100644 --- a/packages/cubejs-base-driver/src/storage-fs/aws.fs.ts +++ b/packages/cubejs-base-driver/src/storage-fs/aws.fs.ts @@ -3,6 +3,42 @@ import { S3, GetObjectCommand, S3ClientConfig } from '@aws-sdk/client-s3'; export type S3StorageClientConfig = S3ClientConfig; +/** + * Returns a copy of the S3 client config with unusable static credentials and + * a blank region removed, so the AWS SDK falls back to its default resolution. + * + * When an export bucket is configured for OIDC / workload identity there are no + * static keys, yet some drivers still pass a `{ accessKeyId: '', secretAccessKey: '' }` + * object. The SDK would send those empty strings verbatim and the request fails + * with `AuthorizationHeaderMalformed: ... a non-empty Access Key (AKID) must be + * provided`. Dropping the empty credentials lets the SDK use its default provider + * chain (env vars, web identity token file `AWS_WEB_IDENTITY_TOKEN_FILE`, IRSA, + * etc.). Likewise a blank region would short-circuit region resolution from + * `AWS_REGION` / `AWS_DEFAULT_REGION`. + * + * Credential *providers* (functions, e.g. `fromTemporaryCredentials`) and + * fully-populated static credentials are left untouched. + */ +export function normalizeS3ClientConfig(clientOptions: S3StorageClientConfig): S3StorageClientConfig { + const config: S3StorageClientConfig = { ...clientOptions }; + const { credentials } = config; + + if ( + credentials && + typeof credentials === 'object' && + 'accessKeyId' in credentials && + (!credentials.accessKeyId || !credentials.secretAccessKey) + ) { + delete config.credentials; + } + + if (typeof config.region === 'string' && config.region.trim() === '') { + delete config.region; + } + + return config; +} + /** * Returns an array of signed AWS S3 URLs of the unloaded csv files. */ @@ -11,7 +47,7 @@ export async function extractUnloadedFilesFromS3( bucketName: string, prefix: string ): Promise { - const storage = new S3(clientOptions); + const storage = new S3(normalizeS3ClientConfig(clientOptions)); // It looks that different driver configurations use different formats // for the bucket - some expect only names, some - full url-like names. // So we unify this. diff --git a/packages/cubejs-base-driver/src/storage-fs/gcs.fs.ts b/packages/cubejs-base-driver/src/storage-fs/gcs.fs.ts index 2da238663e5dc..d2e2f4e4770ee 100644 --- a/packages/cubejs-base-driver/src/storage-fs/gcs.fs.ts +++ b/packages/cubejs-base-driver/src/storage-fs/gcs.fs.ts @@ -4,6 +4,23 @@ export type GoogleStorageClientConfig = { credentials: any, }; +/** + * Whether the provided GCS credentials are usable. Empty string / undefined + * (no credentials configured, e.g. OIDC / workload identity) and an empty + * object are treated as absent so the Google SDK falls back to Application + * Default Credentials (which honors `GOOGLE_APPLICATION_CREDENTIALS`, including + * workload-identity-federation `external_account` config files). + */ +export function hasGCSCredentials(credentials: any): boolean { + if (!credentials) { + return false; + } + if (typeof credentials === 'object' && Object.keys(credentials).length === 0) { + return false; + } + return true; +} + /** * Returns an array of signed GCS URLs of the unloaded csv files. */ @@ -13,7 +30,7 @@ export async function extractFilesFromGCS( tableName: string ): Promise { const storage = new Storage( - gcsConfig.credentials + hasGCSCredentials(gcsConfig.credentials) ? { credentials: gcsConfig.credentials, projectId: gcsConfig.credentials.project_id } : undefined ); diff --git a/packages/cubejs-base-driver/test/unit/storage-fs.test.ts b/packages/cubejs-base-driver/test/unit/storage-fs.test.ts new file mode 100644 index 0000000000000..d00ec4d411615 --- /dev/null +++ b/packages/cubejs-base-driver/test/unit/storage-fs.test.ts @@ -0,0 +1,94 @@ +import { S3 } from '@aws-sdk/client-s3'; +import { Storage } from '@google-cloud/storage'; +import { DefaultAzureCredential } from '@azure/identity'; + +import { normalizeS3ClientConfig } from '../../src/storage-fs/aws.fs'; +import { hasGCSCredentials } from '../../src/storage-fs/gcs.fs'; + +describe('storage-fs credential normalization', () => { + describe('normalizeS3ClientConfig', () => { + test('drops static credentials when both key and secret are blank', () => { + const config = normalizeS3ClientConfig({ + credentials: { accessKeyId: '', secretAccessKey: '' }, + region: 'us-east-1', + }); + + expect(config.credentials).toBeUndefined(); + expect(config.region).toBe('us-east-1'); + }); + + test('drops static credentials when only one of key/secret is set', () => { + expect( + normalizeS3ClientConfig({ credentials: { accessKeyId: 'AKIA', secretAccessKey: '' } }).credentials + ).toBeUndefined(); + expect( + normalizeS3ClientConfig({ credentials: { accessKeyId: '', secretAccessKey: 'secret' } }).credentials + ).toBeUndefined(); + }); + + test('keeps fully-populated static credentials', () => { + const credentials = { accessKeyId: 'AKIA', secretAccessKey: 'secret' }; + expect(normalizeS3ClientConfig({ credentials }).credentials).toEqual(credentials); + }); + + test('keeps a credential provider function untouched', () => { + // e.g. fromTemporaryCredentials / fromWebToken — a function, resolved lazily. + const provider = async () => ({ accessKeyId: 'AKIA', secretAccessKey: 'secret' }); + expect(normalizeS3ClientConfig({ credentials: provider }).credentials).toBe(provider); + }); + + test('drops a blank region', () => { + expect(normalizeS3ClientConfig({ region: '' }).region).toBeUndefined(); + expect(normalizeS3ClientConfig({ region: ' ' }).region).toBeUndefined(); + expect(normalizeS3ClientConfig({ region: 'eu-west-1' }).region).toBe('eu-west-1'); + }); + + test('does not mutate the input config', () => { + const input = { credentials: { accessKeyId: '', secretAccessKey: '' }, region: '' }; + normalizeS3ClientConfig(input); + expect(input.credentials).toEqual({ accessKeyId: '', secretAccessKey: '' }); + expect(input.region).toBe(''); + }); + + // This is the actual reproduction of the Freshworks / CUB-3000 failure mode: + // the AWS SDK must accept a config with no credentials so it can resolve the + // default provider chain (web identity token file) instead of throwing. + test('the S3 client constructs without throwing when credentials are omitted', () => { + expect(() => new S3(normalizeS3ClientConfig({ + credentials: { accessKeyId: '', secretAccessKey: '' }, + region: 'us-east-1', + }))).not.toThrow(); + }); + }); + + describe('hasGCSCredentials', () => { + test('treats empty/undefined/empty-object as absent', () => { + expect(hasGCSCredentials(undefined)).toBe(false); + expect(hasGCSCredentials('')).toBe(false); + expect(hasGCSCredentials({})).toBe(false); + }); + + test('treats a non-empty credentials object as present', () => { + expect(hasGCSCredentials({ project_id: 'p', client_email: 'e' })).toBe(true); + }); + + test('the GCS client constructs without throwing when credentials are absent', () => { + // `new Storage(undefined)` is what extractFilesFromGCS does when no + // credentials are configured; it must fall back to ADC, not throw. + expect(() => new Storage(undefined)).not.toThrow(); + }); + }); + + describe('azure DefaultAzureCredential', () => { + test('constructs without throwing when no static key/secret is provided', () => { + // extractFilesFromAzure falls through to DefaultAzureCredential when only + // clientId/tenantId (or nothing) are provided; it must construct so the + // federated token file (AZURE_FEDERATED_TOKEN_FILE) can be resolved. + // Options are built as a variable to mirror extractFilesFromAzure (and to + // pass clientId/tenantId, which the SDK reads at runtime). + const opts = { clientId: 'c', tenantId: 't', tokenFilePath: undefined }; + expect(() => new DefaultAzureCredential({})).not.toThrow(); + expect(() => new DefaultAzureCredential(opts)).not.toThrow(); + }); + }); +}); diff --git a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts index c1f5da3c70a30..c40d6c08d3136 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts @@ -837,8 +837,18 @@ export class DatabricksDriver extends JDBCDriver { const azureBucketPath = `${bucketName}/${username}`; const exportPrefix = path ? `${path}/${tableName}` : tableName; + // Pass `undefined` (not empty strings) for unset values so the Azure SDK + // falls back to `DefaultAzureCredential` — which resolves a federated + // (workload identity) token from `AZURE_FEDERATED_TOKEN_FILE` together + // with `AZURE_CLIENT_ID` / `AZURE_TENANT_ID` when no static key/secret + // is configured (OIDC). return this.extractFilesFromAzure( - { azureKey, clientId, tenantId, clientSecret }, + { + azureKey: azureKey || undefined, + clientId: clientId || undefined, + tenantId: tenantId || undefined, + clientSecret: clientSecret || undefined, + }, // Databricks uses different bucket address form, so we need to transform it // to the one understandable by extractFilesFromAzure implementation azureBucketPath, @@ -848,13 +858,24 @@ export class DatabricksDriver extends JDBCDriver { const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket); const exportPrefix = path ? `${path}/${tableName}` : tableName; + // Only pass static credentials when both key and secret are configured. + // Otherwise omit them (and a blank region) so the AWS SDK resolves + // credentials from its default provider chain — e.g. the web identity + // token file (`AWS_WEB_IDENTITY_TOKEN_FILE`) used for OIDC / workload + // identity. Passing empty strings makes S3 fail with + // `AuthorizationHeaderMalformed`. + const credentials = + this.config.awsKey && this.config.awsSecret + ? { + accessKeyId: this.config.awsKey, + secretAccessKey: this.config.awsSecret, + } + : undefined; + return this.extractUnloadedFilesFromS3( { - credentials: { - accessKeyId: this.config.awsKey || '', - secretAccessKey: this.config.awsSecret || '', - }, - region: this.config.awsRegion || '', + ...(credentials ? { credentials } : {}), + ...(this.config.awsRegion ? { region: this.config.awsRegion } : {}), }, bucketName, exportPrefix, @@ -863,8 +884,11 @@ export class DatabricksDriver extends JDBCDriver { const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket); const exportPrefix = path ? `${path}/${tableName}` : tableName; + // Omit credentials when none are configured so the Google SDK falls back + // to Application Default Credentials (honors `GOOGLE_APPLICATION_CREDENTIALS`, + // including workload-identity-federation `external_account` configs). return this.extractFilesFromGCS( - { credentials: this.config.gcsCredentials }, + { credentials: this.config.gcsCredentials || undefined }, bucketName, exportPrefix, ); diff --git a/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts b/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts index 275924a184307..a251e396d41e4 100644 --- a/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts +++ b/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts @@ -7,6 +7,23 @@ jest.mock('@azure/storage-blob', () => ({ generateBlobSASQueryParameters: jest.fn().mockReturnValue('test') })); +// Capture the config the S3 client is constructed with so we can assert that +// empty OIDC credentials are not forwarded (which would yield +// `AuthorizationHeaderMalformed`) — see CUB-3000. +const s3ClientConfigs: any[] = []; +jest.mock('@aws-sdk/client-s3', () => ({ + S3: jest.fn().mockImplementation((config: any) => { + s3ClientConfigs.push(config); + return { + listObjectsV2: jest.fn().mockResolvedValue({ Contents: [{ Key: 'product/part-0.csv' }] }), + }; + }), + GetObjectCommand: jest.fn().mockImplementation((input: any) => ({ input })), +})); +jest.mock('@aws-sdk/s3-request-presigner', () => ({ + getSignedUrl: jest.fn().mockResolvedValue('https://signed.example/product/part-0.csv'), +})); + jest.spyOn(ContainerClient.prototype, 'listBlobsFlat').mockImplementation( jest.fn().mockReturnValue([{name: 'product.csv/test.csv'}]) ); @@ -55,11 +72,62 @@ describe('DatabricksDriver', () => { test('should get signed URLs of unloaded csv files by azure client secret', async () => { process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY=''; - databricksDriver = new DatabricksDriver(); + databricksDriver = new DatabricksDriver(); databricksDriver['unloadWithSql'] = mockUnloadWithSql; const result = await databricksDriver.unload(mockTableName, mockOptions); expect(mockUnloadWithSql).toHaveBeenCalledWith(mockTableName, mockSql, mockParams); expect(result.csvFile).toBeTruthy(); }); + + describe('s3 export bucket', () => { + beforeEach(() => { + s3ClientConfigs.length = 0; + process.env.CUBEJS_DB_EXPORT_BUCKET_TYPE = 's3'; + process.env.CUBEJS_DB_EXPORT_BUCKET = 's3://cube-export'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_REGION = 'us-east-1'; + }); + + afterAll(() => { + // Restore the azure bucket config the other tests rely on. + process.env.CUBEJS_DB_EXPORT_BUCKET_TYPE = 'azure'; + process.env.CUBEJS_DB_EXPORT_BUCKET = 'wasbs://cube-export@mock.blob.core.windows.net'; + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY; + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET; + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_REGION; + }); + + // CUB-3000: OIDC / workload identity — no static keys configured. + test('omits credentials when no static keys are set so the SDK uses the default chain', async () => { + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY; + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET; + + databricksDriver = new DatabricksDriver(); + databricksDriver['unloadWithSql'] = mockUnloadWithSql; + + const result = await databricksDriver.unload(mockTableName, mockOptions); + + expect(result.csvFile).toBeTruthy(); + expect(s3ClientConfigs).toHaveLength(1); + expect(s3ClientConfigs[0].credentials).toBeUndefined(); + expect(s3ClientConfigs[0].region).toBe('us-east-1'); + }); + + test('passes static credentials through when configured', async () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY = 'AKIAEXAMPLE'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET = 'secretexample'; + + databricksDriver = new DatabricksDriver(); + databricksDriver['unloadWithSql'] = mockUnloadWithSql; + + const result = await databricksDriver.unload(mockTableName, mockOptions); + + expect(result.csvFile).toBeTruthy(); + expect(s3ClientConfigs).toHaveLength(1); + expect(s3ClientConfigs[0].credentials).toEqual({ + accessKeyId: 'AKIAEXAMPLE', + secretAccessKey: 'secretexample', + }); + }); + }); });