From 5988c4a294eb794be86247f7a3210db1d3950018 Mon Sep 17 00:00:00 2001 From: jan-kubica Date: Tue, 9 Jun 2026 06:02:49 +0200 Subject: [PATCH 1/2] fix: include shipped dependencies in JavaScript SBOMs cdxgen was invoked with --required-only. For bun lockfiles cdxgen derives component scope from source-usage evidence rather than the manifest: a package is marked `required` only when it has a production source import, and everything else (shipped transitive dependencies, and production dependencies imported only from type-declaration or test files) is marked `optional`. --required-only then drops that whole set, so the generated SBOM and third-party notices silently under-reported shipped dependencies. Stop passing --required-only so the SBOM reflects the full installed closure. Development dependencies are now excluded by installing production dependencies only before generation (documented in the README Boundaries section) instead of relying on cdxgen's unreliable bun scope. Add a real-cdxgen integration test (a workspace with a source-imported dependency plus a production dependency imported only from a .d.ts and a test file, installed production-only) asserting the shipped dependency and its transitive runtime dependency appear while a devDependency does not. The CI integration-real job gains a bun toolchain to run it. --- .github/workflows/ci.yml | 4 ++ README.md | 8 +++ src/sbom.rs | 8 ++- tests/integration_real.rs | 146 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 165 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 387eec8..fd8a5ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,6 +60,10 @@ jobs: with: node-version: 22 + - uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 + with: + bun-version: latest + - name: Install cdxgen run: npm install -g @cyclonedx/cdxgen@12.1.5 diff --git a/README.md b/README.md index 633d369..979169f 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,14 @@ The workflow is file-based: especially when your dependency graph includes platform-specific packages. - `notice.internal_scopes` can be used to exclude first-party scoped packages from generated notices and repo SBOM component inventories. +- The JavaScript SBOM reflects whatever is present in `node_modules` at + generation time. Install **production dependencies only** before running + (for example `bun install --frozen-lockfile --production`, or + `npm ci --omit=dev`) so the generated SBOM and notices describe the shipped + closure and do not include development-only dependencies. The tool does not + pass cdxgen `--required-only`, because for bun lockfiles that flag drops + shipped transitive dependencies (and production dependencies imported only + from type-declaration or test files) instead of dev dependencies. - `sbom.exclude_regexes` can be used to exclude generated runtime artifacts such as `wasm/dist/` outputs or root-level `*.wasi-browser.js` files from SBOM evidence so post-build checks stay deterministic. diff --git a/src/sbom.rs b/src/sbom.rs index 8732486..01b7275 100644 --- a/src/sbom.rs +++ b/src/sbom.rs @@ -87,7 +87,13 @@ pub fn generate_project_sbom( .arg("--no-install-deps") .arg("--exclude-regex") .arg(build_exclude_regex(sbom_config)) - .arg("--required-only") + // Deliberately omit cdxgen's --required-only. For bun lockfiles cdxgen + // derives component scope from source-usage evidence rather than the + // manifest, so it marks shipped transitive dependencies (and production + // dependencies imported only from type-declaration or test files) as + // optional and drops them, producing an incomplete SBOM. Completeness of + // the shipped closure is gated on the install instead: callers must + // install production dependencies only (see README "Boundaries"). .arg("--json-pretty") .arg("-o") .arg(output_path) diff --git a/tests/integration_real.rs b/tests/integration_real.rs index 699afad..5ddaae7 100644 --- a/tests/integration_real.rs +++ b/tests/integration_real.rs @@ -70,10 +70,80 @@ cfg-if = "1.0" assert!(status.success()); } +fn write_js_fixture(root: &Path) { + // A workspace member with two production dependencies: + // - `picocolors` is imported from production source, so cdxgen records a + // usage occurrence and marks it `required`. Its presence is what pushes + // cdxgen into usage-based scoping for the workspace. + // - `debug` (which pulls the transitive runtime dependency `ms`) is a + // production dependency referenced only from a type-declaration file and + // a test file. With a `required` sibling present, cdxgen marks `debug` + // and `ms` `optional`, so the historic `--required-only` invocation + // dropped them from the SBOM even though they ship. + // `left-pad` is a development-only dependency and must never be reported. + write_file( + &root.join("package.json"), + r#"{ "name": "fixture-root", "private": true, "workspaces": ["packages/*"] } +"#, + ); + write_file( + &root.join("packages/app/package.json"), + r#"{ + "name": "@fixture/app", + "version": "0.0.0", + "dependencies": { + "picocolors": "1.0.0", + "debug": "4.3.4" + }, + "devDependencies": { + "left-pad": "1.3.0" + } +} +"#, + ); + write_file( + &root.join("bunfig.toml"), + "[install]\nlinker = \"hoisted\"\n", + ); + write_file( + &root.join("packages/app/src/index.ts"), + "import pc from \"picocolors\";\nexport const c = pc;\n", + ); + write_file( + &root.join("packages/app/src/types.d.ts"), + "import type { Debugger } from \"debug\";\nexport type T = Debugger;\n", + ); + write_file( + &root.join("packages/app/src/app.test.ts"), + "import createDebug from \"debug\";\nexport const d = createDebug;\n", + ); + + // The tool runs cdxgen with --no-install-deps, so the production closure must + // already be materialized in node_modules. Installing production-only keeps + // development dependencies out of the generated SBOM. + let status = StdCommand::new("bun") + .arg("install") + .arg("--production") + .arg("--ignore-scripts") + .current_dir(root) + .status() + .expect("bun is required for the real JavaScript cdxgen test"); + assert!(status.success()); +} + fn parse_json(path: &Path) -> Value { serde_json::from_str(&fs::read_to_string(path).unwrap()).unwrap() } +fn component_names(sbom: &Value) -> Vec { + sbom["components"] + .as_array() + .unwrap() + .iter() + .filter_map(|component| component["name"].as_str().map(ToString::to_string)) + .collect() +} + #[test] #[ignore = "requires cdxgen or bunx and network access for a real cdxgen invocation"] fn generate_with_real_cdxgen_for_rust_project() { @@ -174,3 +244,79 @@ fn generate_with_real_cdxgen_and_syft() { .assert() .success(); } + +#[test] +#[ignore = "requires bun, cdxgen or bunx, and network access for a real cdxgen invocation"] +fn generate_includes_shipped_js_dependencies_without_production_source_imports() { + let temp = TempDir::new().unwrap(); + let helpers = TempDir::new().unwrap(); + write_js_fixture(temp.path()); + write_file( + &temp.path().join(".provenance.yml"), + r"version: 1 +output_dir: provenance +projects: + - id: root + path: . + ecosystems: + - javascript +", + ); + + let cdxgen = resolve_real_cdxgen(helpers.path()); + + cargo_bin() + .current_dir(temp.path()) + .env("PROVENANCE_CDXGEN", &cdxgen) + .arg("generate") + .assert() + .success(); + + let sbom = parse_json(&temp.path().join("provenance/sbom.cdx.json")); + let names = component_names(&sbom); + + // Baseline: a production dependency imported from source is always present. + assert!( + names.iter().any(|name| name == "picocolors"), + "expected source-imported dependency 'picocolors' in SBOM, got: {names:?}" + ); + + // `debug` is a production dependency imported only from a `.d.ts` and a + // `.test.ts`; `ms` is its transitive runtime dependency, never imported + // directly. Both ship and must appear (regression guard for `--required-only`, + // which dropped them). + assert!( + names.iter().any(|name| name == "debug"), + "expected shipped production dependency 'debug' in SBOM, got: {names:?}" + ); + assert!( + names.iter().any(|name| name == "ms"), + "expected transitive runtime dependency 'ms' in SBOM, got: {names:?}" + ); + + // `left-pad` is a devDependency and is not installed under --production, so it + // must not be reported as a shipped component. + assert!( + !names.iter().any(|name| name == "left-pad"), + "development-only dependency 'left-pad' must not appear in SBOM, got: {names:?}" + ); + + let notice = + fs::read_to_string(temp.path().join("provenance/THIRD-PARTY-NOTICES.txt")).unwrap(); + assert!( + notice.contains("- debug "), + "notice should attribute 'debug'" + ); + assert!(notice.contains("- ms "), "notice should attribute 'ms'"); + assert!( + !notice.contains("- left-pad "), + "notice should not attribute dev-only 'left-pad'" + ); + + cargo_bin() + .current_dir(temp.path()) + .env("PROVENANCE_CDXGEN", &cdxgen) + .arg("check") + .assert() + .success(); +} From 320c346299f4134f4fa954b40b507867c1a5e391 Mon Sep 17 00:00:00 2001 From: jan-kubica Date: Tue, 9 Jun 2026 06:19:24 +0200 Subject: [PATCH 2/2] fix: scope --required-only omission to JavaScript projects Address review feedback: only omit cdxgen's --required-only for projects that include the JavaScript ecosystem, where the flag drops shipped bun dependencies. Non-JavaScript projects (e.g. Rust crates) keep the flag so their behavior is unchanged. (Verified that under --no-install-deps cdxgen assigns no scope to Rust components, so --required-only is currently a no-op there; keeping it scoped avoids changing the Rust invocation and is future-proof if cdxgen starts scoping cargo dev-dependencies.) --- README.md | 10 ++++++---- src/sbom.rs | 22 ++++++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 979169f..d5df1f0 100644 --- a/README.md +++ b/README.md @@ -37,10 +37,12 @@ The workflow is file-based: generation time. Install **production dependencies only** before running (for example `bun install --frozen-lockfile --production`, or `npm ci --omit=dev`) so the generated SBOM and notices describe the shipped - closure and do not include development-only dependencies. The tool does not - pass cdxgen `--required-only`, because for bun lockfiles that flag drops - shipped transitive dependencies (and production dependencies imported only - from type-declaration or test files) instead of dev dependencies. + closure and do not include development-only dependencies. For JavaScript + projects the tool does not pass cdxgen `--required-only`, because for bun + lockfiles that flag drops shipped transitive dependencies (and production + dependencies imported only from type-declaration or test files) instead of + dev dependencies. Non-JavaScript projects (for example Rust crates) keep + `--required-only`. - `sbom.exclude_regexes` can be used to exclude generated runtime artifacts such as `wasm/dist/` outputs or root-level `*.wasi-browser.js` files from SBOM evidence so post-build checks stay deterministic. diff --git a/src/sbom.rs b/src/sbom.rs index 01b7275..1dc622f 100644 --- a/src/sbom.rs +++ b/src/sbom.rs @@ -86,14 +86,20 @@ pub fn generate_project_sbom( command .arg("--no-install-deps") .arg("--exclude-regex") - .arg(build_exclude_regex(sbom_config)) - // Deliberately omit cdxgen's --required-only. For bun lockfiles cdxgen - // derives component scope from source-usage evidence rather than the - // manifest, so it marks shipped transitive dependencies (and production - // dependencies imported only from type-declaration or test files) as - // optional and drops them, producing an incomplete SBOM. Completeness of - // the shipped closure is gated on the install instead: callers must - // install production dependencies only (see README "Boundaries"). + .arg(build_exclude_regex(sbom_config)); + // cdxgen's --required-only keeps only `required`-scope components. For bun + // lockfiles cdxgen derives scope from source-usage evidence rather than the + // manifest, so it marks shipped transitive dependencies (and production + // dependencies imported only from type-declaration or test files) as + // optional and drops them, producing an incomplete SBOM. Omit the flag for + // any project that includes the JavaScript ecosystem; development + // dependencies are excluded by installing production dependencies only (see + // README "Boundaries"). Non-JavaScript projects keep the flag so their + // existing behavior is unchanged. + if !project.ecosystems.contains(&Ecosystem::Javascript) { + command.arg("--required-only"); + } + command .arg("--json-pretty") .arg("-o") .arg(output_path)