Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion packages/cubejs-base-driver/src/storage-fs/aws.fs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,42 @@ import { S3, GetObjectCommand, S3ClientConfig } from '@aws-sdk/client-s3';

export type S3StorageClientConfig = S3ClientConfig;

/**
* Returns a copy of the S3 client config with unusable static credentials and
* a blank region removed, so the AWS SDK falls back to its default resolution.
*
* When an export bucket is configured for OIDC / workload identity there are no
* static keys, yet some drivers still pass a `{ accessKeyId: '', secretAccessKey: '' }`
* object. The SDK would send those empty strings verbatim and the request fails
* with `AuthorizationHeaderMalformed: ... a non-empty Access Key (AKID) must be
* provided`. Dropping the empty credentials lets the SDK use its default provider
* chain (env vars, web identity token file `AWS_WEB_IDENTITY_TOKEN_FILE`, IRSA,
* etc.). Likewise a blank region would short-circuit region resolution from
* `AWS_REGION` / `AWS_DEFAULT_REGION`.
*
* Credential *providers* (functions, e.g. `fromTemporaryCredentials`) and
* fully-populated static credentials are left untouched.
*/
export function normalizeS3ClientConfig(clientOptions: S3StorageClientConfig): S3StorageClientConfig {
const config: S3StorageClientConfig = { ...clientOptions };
const { credentials } = config;

if (
credentials &&
typeof credentials === 'object' &&
'accessKeyId' in credentials &&
(!credentials.accessKeyId || !credentials.secretAccessKey)
) {
delete config.credentials;
}

if (typeof config.region === 'string' && config.region.trim() === '') {
delete config.region;
}

return config;
}

/**
* Returns an array of signed AWS S3 URLs of the unloaded csv files.
*/
Expand All @@ -11,7 +47,7 @@ export async function extractUnloadedFilesFromS3(
bucketName: string,
prefix: string
): Promise<string[]> {
const storage = new S3(clientOptions);
const storage = new S3(normalizeS3ClientConfig(clientOptions));
// It looks that different driver configurations use different formats
// for the bucket - some expect only names, some - full url-like names.
// So we unify this.
Expand Down
19 changes: 18 additions & 1 deletion packages/cubejs-base-driver/src/storage-fs/gcs.fs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ export type GoogleStorageClientConfig = {
credentials: any,
};

/**
* Whether the provided GCS credentials are usable. Empty string / undefined
* (no credentials configured, e.g. OIDC / workload identity) and an empty
* object are treated as absent so the Google SDK falls back to Application
* Default Credentials (which honors `GOOGLE_APPLICATION_CREDENTIALS`, including
* workload-identity-federation `external_account` config files).
*/
export function hasGCSCredentials(credentials: any): boolean {
if (!credentials) {
return false;
}
if (typeof credentials === 'object' && Object.keys(credentials).length === 0) {
return false;
}
return true;
}

/**
* Returns an array of signed GCS URLs of the unloaded csv files.
*/
Expand All @@ -13,7 +30,7 @@ export async function extractFilesFromGCS(
tableName: string
): Promise<string[]> {
const storage = new Storage(
gcsConfig.credentials
hasGCSCredentials(gcsConfig.credentials)
? { credentials: gcsConfig.credentials, projectId: gcsConfig.credentials.project_id }
: undefined
);
Expand Down
94 changes: 94 additions & 0 deletions packages/cubejs-base-driver/test/unit/storage-fs.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { S3 } from '@aws-sdk/client-s3';
import { Storage } from '@google-cloud/storage';
import { DefaultAzureCredential } from '@azure/identity';

import { normalizeS3ClientConfig } from '../../src/storage-fs/aws.fs';
import { hasGCSCredentials } from '../../src/storage-fs/gcs.fs';

describe('storage-fs credential normalization', () => {
describe('normalizeS3ClientConfig', () => {
test('drops static credentials when both key and secret are blank', () => {
const config = normalizeS3ClientConfig({
credentials: { accessKeyId: '', secretAccessKey: '' },
region: 'us-east-1',
});

expect(config.credentials).toBeUndefined();
expect(config.region).toBe('us-east-1');
});

test('drops static credentials when only one of key/secret is set', () => {
expect(
normalizeS3ClientConfig({ credentials: { accessKeyId: 'AKIA', secretAccessKey: '' } }).credentials
).toBeUndefined();
expect(
normalizeS3ClientConfig({ credentials: { accessKeyId: '', secretAccessKey: 'secret' } }).credentials
).toBeUndefined();
});

test('keeps fully-populated static credentials', () => {
const credentials = { accessKeyId: 'AKIA', secretAccessKey: 'secret' };
expect(normalizeS3ClientConfig({ credentials }).credentials).toEqual(credentials);
});

test('keeps a credential provider function untouched', () => {
// e.g. fromTemporaryCredentials / fromWebToken — a function, resolved lazily.
const provider = async () => ({ accessKeyId: 'AKIA', secretAccessKey: 'secret' });
expect(normalizeS3ClientConfig({ credentials: provider }).credentials).toBe(provider);
});

test('drops a blank region', () => {
expect(normalizeS3ClientConfig({ region: '' }).region).toBeUndefined();
expect(normalizeS3ClientConfig({ region: ' ' }).region).toBeUndefined();
expect(normalizeS3ClientConfig({ region: 'eu-west-1' }).region).toBe('eu-west-1');
});

test('does not mutate the input config', () => {
const input = { credentials: { accessKeyId: '', secretAccessKey: '' }, region: '' };
normalizeS3ClientConfig(input);
expect(input.credentials).toEqual({ accessKeyId: '', secretAccessKey: '' });
expect(input.region).toBe('');
});

// This is the actual reproduction of the Freshworks / CUB-3000 failure mode:
// the AWS SDK must accept a config with no credentials so it can resolve the
// default provider chain (web identity token file) instead of throwing.
test('the S3 client constructs without throwing when credentials are omitted', () => {
expect(() => new S3(normalizeS3ClientConfig({
credentials: { accessKeyId: '', secretAccessKey: '' },
region: 'us-east-1',
}))).not.toThrow();
});
});

describe('hasGCSCredentials', () => {
test('treats empty/undefined/empty-object as absent', () => {
expect(hasGCSCredentials(undefined)).toBe(false);
expect(hasGCSCredentials('')).toBe(false);
expect(hasGCSCredentials({})).toBe(false);
});

test('treats a non-empty credentials object as present', () => {
expect(hasGCSCredentials({ project_id: 'p', client_email: 'e' })).toBe(true);
});

test('the GCS client constructs without throwing when credentials are absent', () => {
// `new Storage(undefined)` is what extractFilesFromGCS does when no
// credentials are configured; it must fall back to ADC, not throw.
expect(() => new Storage(undefined)).not.toThrow();
});
});

describe('azure DefaultAzureCredential', () => {
test('constructs without throwing when no static key/secret is provided', () => {
// extractFilesFromAzure falls through to DefaultAzureCredential when only
// clientId/tenantId (or nothing) are provided; it must construct so the
// federated token file (AZURE_FEDERATED_TOKEN_FILE) can be resolved.
// Options are built as a variable to mirror extractFilesFromAzure (and to
// pass clientId/tenantId, which the SDK reads at runtime).
const opts = { clientId: 'c', tenantId: 't', tokenFilePath: undefined };
expect(() => new DefaultAzureCredential({})).not.toThrow();
expect(() => new DefaultAzureCredential(opts)).not.toThrow();
});
});
});
38 changes: 31 additions & 7 deletions packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -837,8 +837,18 @@ export class DatabricksDriver extends JDBCDriver {
const azureBucketPath = `${bucketName}/${username}`;
const exportPrefix = path ? `${path}/${tableName}` : tableName;

// Pass `undefined` (not empty strings) for unset values so the Azure SDK
// falls back to `DefaultAzureCredential` — which resolves a federated
// (workload identity) token from `AZURE_FEDERATED_TOKEN_FILE` together
// with `AZURE_CLIENT_ID` / `AZURE_TENANT_ID` when no static key/secret
// is configured (OIDC).
return this.extractFilesFromAzure(
{ azureKey, clientId, tenantId, clientSecret },
{
azureKey: azureKey || undefined,
clientId: clientId || undefined,
tenantId: tenantId || undefined,
clientSecret: clientSecret || undefined,
},
// Databricks uses different bucket address form, so we need to transform it
// to the one understandable by extractFilesFromAzure implementation
azureBucketPath,
Expand All @@ -848,13 +858,24 @@ export class DatabricksDriver extends JDBCDriver {
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket);
const exportPrefix = path ? `${path}/${tableName}` : tableName;

// Only pass static credentials when both key and secret are configured.
// Otherwise omit them (and a blank region) so the AWS SDK resolves
// credentials from its default provider chain — e.g. the web identity
// token file (`AWS_WEB_IDENTITY_TOKEN_FILE`) used for OIDC / workload
// identity. Passing empty strings makes S3 fail with
// `AuthorizationHeaderMalformed`.
const credentials =
this.config.awsKey && this.config.awsSecret
? {
accessKeyId: this.config.awsKey,
secretAccessKey: this.config.awsSecret,
}
: undefined;

return this.extractUnloadedFilesFromS3(
{
credentials: {
accessKeyId: this.config.awsKey || '',
secretAccessKey: this.config.awsSecret || '',
},
region: this.config.awsRegion || '',
...(credentials ? { credentials } : {}),
...(this.config.awsRegion ? { region: this.config.awsRegion } : {}),
},
bucketName,
exportPrefix,
Expand All @@ -863,8 +884,11 @@ export class DatabricksDriver extends JDBCDriver {
const { bucketName, path } = this.parseBucketUrl(this.config.exportBucket);
const exportPrefix = path ? `${path}/${tableName}` : tableName;

// Omit credentials when none are configured so the Google SDK falls back
// to Application Default Credentials (honors `GOOGLE_APPLICATION_CREDENTIALS`,
// including workload-identity-federation `external_account` configs).
return this.extractFilesFromGCS(
{ credentials: this.config.gcsCredentials },
{ credentials: this.config.gcsCredentials || undefined },
bucketName,
exportPrefix,
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ jest.mock('@azure/storage-blob', () => ({
generateBlobSASQueryParameters: jest.fn().mockReturnValue('test')
}));

// Capture the config the S3 client is constructed with so we can assert that
// empty OIDC credentials are not forwarded (which would yield
// `AuthorizationHeaderMalformed`) — see CUB-3000.
const s3ClientConfigs: any[] = [];
jest.mock('@aws-sdk/client-s3', () => ({
S3: jest.fn().mockImplementation((config: any) => {
s3ClientConfigs.push(config);
return {
listObjectsV2: jest.fn().mockResolvedValue({ Contents: [{ Key: 'product/part-0.csv' }] }),
};
}),
GetObjectCommand: jest.fn().mockImplementation((input: any) => ({ input })),
}));
jest.mock('@aws-sdk/s3-request-presigner', () => ({
getSignedUrl: jest.fn().mockResolvedValue('https://signed.example/product/part-0.csv'),
}));

jest.spyOn(ContainerClient.prototype, 'listBlobsFlat').mockImplementation(
jest.fn().mockReturnValue([{name: 'product.csv/test.csv'}])
);
Expand Down Expand Up @@ -55,11 +72,62 @@ describe('DatabricksDriver', () => {

test('should get signed URLs of unloaded csv files by azure client secret', async () => {
process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY='';
databricksDriver = new DatabricksDriver();
databricksDriver = new DatabricksDriver();
databricksDriver['unloadWithSql'] = mockUnloadWithSql;

const result = await databricksDriver.unload(mockTableName, mockOptions);
expect(mockUnloadWithSql).toHaveBeenCalledWith(mockTableName, mockSql, mockParams);
expect(result.csvFile).toBeTruthy();
});

describe('s3 export bucket', () => {
beforeEach(() => {
s3ClientConfigs.length = 0;
process.env.CUBEJS_DB_EXPORT_BUCKET_TYPE = 's3';
process.env.CUBEJS_DB_EXPORT_BUCKET = 's3://cube-export';
process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_REGION = 'us-east-1';
});

afterAll(() => {
// Restore the azure bucket config the other tests rely on.
process.env.CUBEJS_DB_EXPORT_BUCKET_TYPE = 'azure';
process.env.CUBEJS_DB_EXPORT_BUCKET = 'wasbs://cube-export@mock.blob.core.windows.net';
delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY;
delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET;
delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_REGION;
});

// CUB-3000: OIDC / workload identity — no static keys configured.
test('omits credentials when no static keys are set so the SDK uses the default chain', async () => {
delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY;
delete process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET;

databricksDriver = new DatabricksDriver();
databricksDriver['unloadWithSql'] = mockUnloadWithSql;

const result = await databricksDriver.unload(mockTableName, mockOptions);

expect(result.csvFile).toBeTruthy();
expect(s3ClientConfigs).toHaveLength(1);
expect(s3ClientConfigs[0].credentials).toBeUndefined();
expect(s3ClientConfigs[0].region).toBe('us-east-1');
});

test('passes static credentials through when configured', async () => {
process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_KEY = 'AKIAEXAMPLE';
process.env.CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET = 'secretexample';

databricksDriver = new DatabricksDriver();
databricksDriver['unloadWithSql'] = mockUnloadWithSql;

const result = await databricksDriver.unload(mockTableName, mockOptions);

expect(result.csvFile).toBeTruthy();
expect(s3ClientConfigs).toHaveLength(1);
expect(s3ClientConfigs[0].credentials).toEqual({
accessKeyId: 'AKIAEXAMPLE',
secretAccessKey: 'secretexample',
});
});
});
});
Loading