diff --git a/MUTATION_BASELINE.md b/MUTATION_BASELINE.md new file mode 100644 index 0000000..3d9149a --- /dev/null +++ b/MUTATION_BASELINE.md @@ -0,0 +1,67 @@ +# Mutation Testing Baseline + +This document inventories the mutants that the test suite does not kill, with a one-line justification for each category. CI enforces an 80% efficacy floor (see `.github/workflows/ci.yml`); new mutants outside the categories below should fail review. + +> **This project has not been independently audited.** The work described here is internal hardening done by the maintainer with audit-equivalent methodology — fuzzing, mutation testing, named threat-class tests, RFC-clause-traceable tests. Treat this document as a *self-assessment*, not an external attestation. + +## Methodology + +- Tool: [`gremlins`](https://github.com/go-gremlins/gremlins) +- Command: `make mutation-test` (runs from `pkg/cms/` with `--timeout-coefficient 30`) +- Gate: `--threshold-efficacy 80` in the CI `mutation` job + +## Current numbers (last measured 2026-06-17) + +| Package | Killed | Lived | Not covered | Efficacy | Coverage | +|---|---|---|---|---|---| +| `pkg/cms` | 221 | 44 | 23 | **83.40%** | 92.01% | +| `pkg/cms/internal` | 18 | 0 | 2 | **100.00%** | 90.00% | + +Run `make mutation-test` to reproduce; full output goes to stderr. + +## Why we don't chase 100% on `pkg/cms` + +Half the remaining mutants are unkillable without weakening the library. The lived/not-covered set decomposes into four categories. + +### Category A — Logically equivalent (cannot be killed; ~15 mutants) + +The mutation produces code that's semantically identical to the original. + +| Example | Why it's equivalent | +|---|---| +| `signer.go:212` — `if opts.DigestAlgorithm != 0 && opts.DigestAlgorithm != SHA512` | RFC 8419 always-override-anyway: code uses SHA-512 regardless. Mutating either `!=` to `==` doesn't change observable behavior. | +| `parseASN1Length` overflow detection `length < 0` | Belt-and-suspenders against integer overflow. The earlier `numBytes > 4` check already prevents triggering it on 64-bit platforms. | + +These mutants would only be "killable" by deleting the defense-in-depth code. We keep the code. + +### Category B — Defensive bounds (effectively unreachable; ~12 mutants) + +The mutation flips a comparison whose true value requires an input the test suite refuses to construct: ≥2 GB ASN.1 inputs, malformed lengths past `parseASN1Length`'s upstream sanity checks. + +Triggering these mutants in tests would mean instantiating multi-gigabyte allocations on every test run. The CI cost (slow tests, brittle infrastructure) outweighs the bug-catching value. + +### Category C — Error-message format strings (~5 mutants) + +Mutants in `errors.go` that change the wording of an error string. Production callers should not depend on error string content; behavioral assertions in the test suite check for `errors.Is/As` matches, not substrings. + +### Category D — Genuine boundary cases (~10 mutants, addressable) + +Specific input sizes our tests don't naturally hit (e.g. a SignerInfo whose total length lands at exactly 65,536 bytes). These are the *only* category where additional tests would meaningfully improve mutation efficacy. + +We don't pursue them because the bug-finding return is low — the existing length-boundary tests (`length_boundary_test.go`) already cover the load-bearing branches in `makeSequenceHeader`/`makeSetHeader`, and the remaining mutants live in fully-tested code that the mutation tool's boundary-flipping heuristic struggles to distinguish from the original. + +## Promotion criteria + +If a *new* lived mutant appears in CI that isn't covered by Categories A–D above: + +1. Read the mutant's location. Identify what behavior it perturbs. +2. If it represents a real bug surface → add a test that kills it. +3. If it falls under one of the categories above → update this file with the new instance. +4. Never bump the CI threshold down to accommodate a regression; raise tests up instead. + +## What this baseline does not capture + +- **Cryptographic primitive correctness** (Ed25519, SHA-256/384/512): covered by Go stdlib's own test suite plus our `TestRFC8032TestVectors` regression check, not by mutation testing. +- **Side-channel resistance**: not tested by mutation. Constant-time properties are checked by code review and `subtle.ConstantTimeCompare`/`crypto/subtle` usage audits. +- **OpenSSL interop**: covered by `make docker-test`, not by mutation. +- **Supply chain**: covered by `gosec`, `govulncheck`, pinned action SHAs, and `go.mod` minimum version (1.25.5). diff --git a/README.md b/README.md index 1ae7d50..32fc11e 100644 --- a/README.md +++ b/README.md @@ -149,9 +149,55 @@ The library enforces the following size limits for security: - **Maximum CMS signature size**: 1MB (prevents memory exhaustion from malformed signatures) - **Maximum certificate size**: 64KB (standard X.509 certificates are typically 1-4KB) -- **Supported digest algorithm**: SHA-256 only (MD5 and SHA-1 are rejected) +- **Supported digest algorithms**: SHA-256, SHA-384, SHA-512 (MD5 and SHA-1 are rejected; RFC 8419 mandates SHA-512 for Ed25519 with signedAttributes) - **Supported signature algorithm**: Ed25519 only +## Testing & Hardening + +> **This library has not been independently audited.** What's described here is internal, audit-equivalent testing work done by the maintainer — not an external attestation. + +The repository ships with a layered test methodology designed to make the work defensible without the price tag of a third-party review: + +| Layer | What it does | Where | +|---|---|---| +| **Unit + roundtrip tests** | Sign/verify against RFC 8032 vectors, OpenSSL interop, expected-shape parsing | `pkg/cms/*_test.go` | +| **Behavioral fuzzers** | Random data + ephemeral key → sign → verify; assert roundtrip and tamper-detection invariants | `pkg/cms/behavioral_fuzz_test.go`, `tier2_fuzz_test.go`, `tier3_fuzz_test.go` | +| **RFC-traceable tests** | Named per RFC clause so you can `grep TestRFC5652_5_3` and see what's covered | `pkg/cms/rfc_compliance_test.go` | +| **Named threat-class tests** | One test per documented CMS attack (replay, key confusion, trust bypass, etc.) | `pkg/cms/attack_scenarios_test.go` | +| **DER strictness probes** | Reject non-canonical length encodings (BER) on the verifier side | `pkg/cms/der_strictness_test.go` | +| **Byte-by-byte tamper enumeration** | Verifies every byte of a Case 1 / Case 2 CMS is load-bearing | `pkg/cms/tamper_enum_test.go` | +| **Concurrent sign/verify under `-race`** | 32 goroutines × 200 verifications | `pkg/cms/concurrency_test.go` | +| **Length-encoding boundary tests** | Byte-exact DER output at every length-form boundary (X.690 §8.1.3.4) | `pkg/cms/length_boundary_test.go` | +| **CMS construction harness** | Test-only builder for reaching verifier paths the production signer can't emit (e.g. SKI form) | `pkg/cms/cms_builder_test.go` | +| **Mutation testing with CI gate** | [`gremlins`](https://github.com/go-gremlins/gremlins) at ≥80% efficacy floor — see [MUTATION_BASELINE.md](MUTATION_BASELINE.md) | `.github/workflows/ci.yml` | +| **Static analysis** | `gosec` (no exclusions), `govulncheck`, `golangci-lint` | `.github/workflows/ci.yml` | +| **Toolchain CVE tracking** | `go.mod` pinned to a stdlib patch that clears all reachable CVEs surfaced by `govulncheck` | `go.mod` | + +Run the full audit-level suite locally: + +```bash +make test # full suite under -race with coverage +make long-fuzz # every fuzzer for 10m each (override FUZZTIME=) +make overnight-fuzz # FUZZTIME=1h per fuzzer (~16h total) +make mutation-test # gremlins mutation analysis +make govulncheck # stdlib + dependency CVE check +make docker-test # OpenSSL interop in a clean container +``` + +**Bugs found and fixed by this methodology** (all in this repo's git history): + +- `SignedData.Version` accepted any `int` instead of the RFC 5652 §5.1 whitelist of `{1, 3, 4, 5}` ([#13](https://github.com/agentic-research/go-cms/pull/13)) +- `EncapContentInfo.eContentType` was unchecked when `signedAttributes` were absent — RFC 5652 §11.1 requires `id-data` ([#13](https://github.com/agentic-research/go-cms/pull/13)) +- `parseASN1Length` accepted non-canonical DER length encodings, a malleability surface ([#13](https://github.com/agentic-research/go-cms/pull/13)) +- `matchesSID` expected EXPLICIT `[0]` wrapping of SubjectKeyIdentifier instead of the RFC-canonical IMPLICIT form — meaning the verifier could not validate SKI-form CMS produced by OpenSSL or `github.com/github/ietf-cms` ([#14](https://github.com/agentic-research/go-cms/pull/14)) + +What this methodology does *not* cover: + +- **Cryptographic primitive correctness** (Ed25519, SHA-2 family) — covered by Go stdlib's own test suite. +- **Side-channel resistance** beyond `crypto/subtle.ConstantTimeCompare` usage. +- **Hardware Security Module integration** — out of scope. +- **Formal protocol-level review of CMS embedding inside higher-level protocols** (S/MIME, CAdES, etc.) — out of scope. + ## Security This is a personal, open-source project. While developed with security best practices in mind, it comes with no guarantees. diff --git a/pkg/cms/internal/asn1util_test.go b/pkg/cms/internal/asn1util_test.go new file mode 100644 index 0000000..8085dda --- /dev/null +++ b/pkg/cms/internal/asn1util_test.go @@ -0,0 +1,165 @@ +package internal + +import ( + "bytes" + "encoding/asn1" + "testing" +) + +// These tests close the largest remaining mutation-testing coverage gap +// on the package: gremlins runs against pkg/cms/ and could not see any +// tests that linked the pkg/cms/internal/ package, so every mutant in +// the three Marshal*Header helpers and MarshalSafe registered as NOT +// COVERED. The helpers are functionally parallel to makeSequenceHeader / +// makeSetHeader in pkg/cms/signer.go (themselves tested in +// pkg/cms/length_boundary_test.go), so the same boundary-table approach +// applies here. +// +// Test cases probe both sides of every DER length-form boundary +// (X.690 §8.1.3.4) so a `<` ↔ `<=` mutation produces a detectable +// difference. + +var lengthCases = []struct { + length int + encLen []byte // length-encoding bytes (no tag) +}{ + {0, []byte{0x00}}, + {1, []byte{0x01}}, + {126, []byte{0x7e}}, + {127, []byte{0x7f}}, + {128, []byte{0x81, 0x80}}, + {129, []byte{0x81, 0x81}}, + {254, []byte{0x81, 0xfe}}, + {255, []byte{0x81, 0xff}}, + {256, []byte{0x82, 0x01, 0x00}}, + {257, []byte{0x82, 0x01, 0x01}}, + {65534, []byte{0x82, 0xff, 0xfe}}, + {65535, []byte{0x82, 0xff, 0xff}}, + {65536, []byte{0x83, 0x01, 0x00, 0x00}}, + {65537, []byte{0x83, 0x01, 0x00, 0x01}}, +} + +// TestMarshalSequenceHeader asserts byte-exact DER SEQUENCE headers at +// and around every length-form boundary. +func TestMarshalSequenceHeader(t *testing.T) { + for _, tc := range lengthCases { + t.Run(lengthName(tc.length), func(t *testing.T) { + got := MarshalSequenceHeader(tc.length) + want := append([]byte{0x30}, tc.encLen...) + if !bytes.Equal(got, want) { + t.Errorf("MarshalSequenceHeader(%d) = % x, want % x", tc.length, got, want) + } + }) + } +} + +// TestMarshalSetHeader asserts byte-exact DER SET headers at and around +// every length-form boundary. +func TestMarshalSetHeader(t *testing.T) { + for _, tc := range lengthCases { + t.Run(lengthName(tc.length), func(t *testing.T) { + got := MarshalSetHeader(tc.length) + want := append([]byte{0x31}, tc.encLen...) + if !bytes.Equal(got, want) { + t.Errorf("MarshalSetHeader(%d) = % x, want % x", tc.length, got, want) + } + }) + } +} + +// TestMarshalImplicitHeader asserts byte-exact DER [0] IMPLICIT headers +// at and around every length-form boundary. Used by the signer's +// signedAttributes wrapping (0xA0 tag for IMPLICIT context-specific 0). +func TestMarshalImplicitHeader(t *testing.T) { + for _, tc := range lengthCases { + t.Run(lengthName(tc.length), func(t *testing.T) { + got := MarshalImplicitHeader(tc.length) + want := append([]byte{0xA0}, tc.encLen...) + if !bytes.Equal(got, want) { + t.Errorf("MarshalImplicitHeader(%d) = % x, want % x", tc.length, got, want) + } + }) + } +} + +// TestMarshalSafe asserts MarshalSafe is a faithful pass-through to +// asn1.Marshal — same outputs, same errors. The helper exists as a +// single seam for future hardening (e.g. depth limits) and we test the +// current behavior so any future divergence is intentional and visible. +func TestMarshalSafe(t *testing.T) { + cases := []struct { + name string + value any + }{ + {"int", 42}, + {"bool", true}, + {"byteslice", []byte{0x01, 0x02, 0x03}}, + {"oid", asn1.ObjectIdentifier{1, 3, 101, 112}}, + {"string", "hello"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + gotSafe, errSafe := MarshalSafe(c.value) + gotDirect, errDirect := asn1.Marshal(c.value) + if (errSafe == nil) != (errDirect == nil) { + t.Fatalf("MarshalSafe err=%v vs asn1.Marshal err=%v", errSafe, errDirect) + } + if !bytes.Equal(gotSafe, gotDirect) { + t.Errorf("MarshalSafe(% q) = % x; asn1.Marshal = % x", c.name, gotSafe, gotDirect) + } + }) + } +} + +// TestConstants pins the package's documented size limits. Changing +// these values is a security-relevant decision (it shifts the DoS +// resistance envelope), so the test exists to force the change through +// code review. +func TestConstants(t *testing.T) { + if MaxSignatureSize != 1024*1024 { + t.Errorf("MaxSignatureSize = %d, want %d (1MB)", MaxSignatureSize, 1024*1024) + } + if MaxCertSize != 64*1024 { + t.Errorf("MaxCertSize = %d, want %d (64KB)", MaxCertSize, 64*1024) + } + if SigTypeCMS != "cms" { + t.Errorf("SigTypeCMS = %q, want %q", SigTypeCMS, "cms") + } +} + +// lengthName turns a length into a stable subtest name covering every +// boundary case. +func lengthName(length int) string { + switch length { + case 0: + return "len_0_empty" + case 1: + return "len_1" + case 126: + return "len_126_below_short_max" + case 127: + return "len_127_short_max" + case 128: + return "len_128_long1_min" + case 129: + return "len_129" + case 254: + return "len_254_below_long1_max" + case 255: + return "len_255_long1_max" + case 256: + return "len_256_long2_min" + case 257: + return "len_257" + case 65534: + return "len_65534_below_long2_max" + case 65535: + return "len_65535_long2_max" + case 65536: + return "len_65536_long3_min" + case 65537: + return "len_65537" + default: + return "len_arbitrary" + } +}