diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..8ce5375 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,27 @@ +name: Lint +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint: + name: Defaults & Misspelling + runs-on: ubuntu-latest + + steps: + + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version: "1.25" + + - name: Check out code + uses: actions/checkout@v4 + + - name: Lint + uses: golangci/golangci-lint-action@v9 + with: + version: v2.12 + args: --enable misspell diff --git a/.github/workflows/module.yml b/.github/workflows/module.yml new file mode 100644 index 0000000..0b92456 --- /dev/null +++ b/.github/workflows/module.yml @@ -0,0 +1,32 @@ +name: Go Module + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + strategy: + matrix: + go-version: ["1.25", "1.26"] + + steps: + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version: ${{ matrix.go-version }} + + - name: Check out code + uses: actions/checkout@v4 + + - name: Install Dependencies + run: go mod download + env: + GOPROXY: https://proxy.golang.org,direct + + - name: Run Unit Tests + run: make test diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..2df4028 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,25 @@ +name: Go Security Checker +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + gosec: + name: Inspect for security problems + runs-on: ubuntu-latest + + steps: + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version: "1.25" + + - name: Check out code + uses: actions/checkout@v4 + + - name: Run Gosec scanner + uses: securego/gosec@master + with: + args: -exclude-dir=privacytest ./... diff --git a/Makefile b/Makefile index ff961d0..9a7abe0 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,13 @@ -# DOCKER_NETWORK = lambda-local +.PHONY: lint security test test/cov_html test/cov_total bench bench/profile doc -# DYNAMODB_PORT = 8070 -# DYNAMODB_VOLUME = dynamodb-local-v2.0 - -# KMS_PORT = 8090 - -# export DYNAMODB_ENDPOINT = http://localhost:$(DYNAMODB_PORT) -# export KMS_ENDPOINT = http://localhost:$(KMS_PORT) - -.PHONY: lint lint: golangci-lint run --enable misspell +security: + gosec -exclude-dir=privacytest ./... + test: - packages=`go list ./... | grep -v privacytest`; \ + packages=$$(go list ./... | grep -v privacytest); \ go test -race -cover $$packages -coverprofile coverage.out -covermode atomic test/cov_html: @@ -29,4 +23,4 @@ bench/profile: go tool pprof -alloc_objects mem.prof doc: - godoc -http=:6060 \ No newline at end of file + godoc -http=:6060 diff --git a/README.md b/README.md new file mode 100644 index 0000000..2a1c880 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +privacy-engine +============ +[![Go Module](https://github.com/ln80/privacy-engine/actions/workflows/module.yml/badge.svg)](https://github.com/ln80/privacy-engine/actions/workflows/module.yml) +[![GoDoc](https://godoc.org/github.com/ln80/privacy-engine?status.svg)](https://godoc.org/github.com/ln80/privacy-engine) + +A Go library for field-level encryption, crypto-shredding, and tokenization of sensitive data in structs. Built on top of [struct-sensitive](https://github.com/ln80/struct-sensitive), it uses struct tags to identify PII fields and applies AES-256-GCM encryption per data subject. + +Designed for immutable stores (event logs, audit trails) where you can't delete records but need to comply with data erasure requirements (GDPR Article 17) via cryptographic erasure. + +## Installation + +```bash +go get github.com/ln80/privacy-engine +``` + +## Features + +- **Field-level encryption** using AES-256-GCM with per-subject data encryption keys (DEK) +- **Crypto-shredding** with graceful mode: disable keys first, recover within a grace period, then hard-delete +- **Streaming encryption** for large payloads with chunk-based authenticated encryption +- **Tokenization** to replace sensitive identifiers with opaque surrogate tokens +- **Key derivation** via HKDF-SHA256 for purpose-scoped keys from a subject's DEK +- **Multi-tenancy** with namespace isolation and a Factory for managing Protector instances +- **Pluggable backends** via `KeyEngine`, `Encryptor`, and `TokenEngine` interfaces + +## Quick Start + +```go +import ( + "context" + + "github.com/ln80/privacy-engine" + "github.com/ln80/privacy-engine/memory" +) + +type User struct { + ID string `pii:"subjectID"` + Email string `pii:"data,replace=redacted"` + Country string +} + +func main() { + ctx := context.Background() + protector := privacy.NewProtector("my-namespace", memory.NewKeyEngine()) + + user := User{ID: "user-123", Email: "alice@example.com", Country: "BE"} + + // Encrypt PII fields in-place + _ = protector.Encrypt(ctx, &user) + // user.Email is now: "ENC..dXNlci0xMjM=.Base64CipherText..." + // user.Country is unchanged + + // Decrypt back + _ = protector.Decrypt(ctx, &user) + // user.Email is "alice@example.com" again + + // Crypto-shred: forget the subject's key + _ = protector.Encrypt(ctx, &user) + _ = protector.Forget(ctx, "user-123") + _ = protector.Decrypt(ctx, &user) + // user.Email is now "redacted" (from the replace tag option) +} +``` + +## Struct Tags + +Fields are tagged using `pii`, `sensitive`, or `sens` (interchangeable): + +| Tag | Purpose | Example | +|-----|---------|---------| +| `subjectID` | Identifies the data subject (one per struct) | `pii:"subjectID"` | +| `data` | Marks a field as sensitive | `pii:"data"` | +| `data,replace=X` | Replacement value when the subject is forgotten | `pii:"data,replace=deleted"` | +| `dive` | Recurse into nested structs | `pii:"dive"` | + +## Architecture + +``` +┌──────────────┐ +│ Protector │ ← Main API: Encrypt, Decrypt, Forget, Recover, Tokenize +└──────┬───────┘ + │ + ┌────┴─────┐ ┌────────────┐ ┌──────────────┐ + │KeyEngine │ │ Encryptor │ │ TokenEngine │ + │(keys CRUD)│ │(AES-256-GCM)│ │(value↔token) │ + └──────────┘ └────────────┘ └──────────────┘ +``` + +- **KeyEngine** manages encryption key lifecycle (create, get, disable, re-enable, delete). Implementations: in-memory (for tests), DynamoDB + KMS (production, see [privacy-engine.elastic](https://github.com/ln80/privacy-engine.elastic)). +- **Encryptor** handles the actual encryption. Default: AES-256-GCM with random nonces and namespace-bound AAD. +- **TokenEngine** manages value-to-token mappings for pseudonymization. Optional. + +## Tokenization + +Replace sensitive identifiers with opaque tokens early in the pipeline: + +```go +tokens, _ := protector.Tokenize(ctx, privacy.TokenDataSlice("alice@example.com"), privacy.WithPrefix("sub_")) +surrogateID := tokens.Get("alice@example.com").Token +// Use surrogateID downstream instead of the real email +``` + +## Streaming Encryption + +For large payloads (files, attachments): + +```go +encReader, _ := protector.EncryptStream(ctx, "user-123", plaintextReader) +// encReader emits authenticated ciphertext in 4MB chunks + +decReader, _ := protector.DecryptStream(ctx, "user-123", encReader) +// decReader emits the original plaintext +``` + +## Factory & Monitoring + +For multi-tenant applications, use the Factory to manage one Protector per namespace: + +```go +factory := privacy.NewFactory(func(namespace string) privacy.Protector { + return privacy.NewProtector(namespace, keyEngine, opts...) +}) + +// Periodically clears key caches and evicts idle protectors +factory.Monitor(ctx) + +protector, clearFn := factory.Instance("tenant-abc") +defer clearFn() +``` + +## Configuration + +`NewProtector` accepts functional options: + +| Option | Default | Description | +|--------|---------|-------------| +| `CacheEnabled` | `true` | Wrap engines with in-memory TTL cache | +| `CacheTTL` | `20s` | Cache time-to-live | +| `GracefulMode` | `true` | Disable keys before deleting (allows recovery) | +| `Encryptor` | AES-256-GCM | Encryption algorithm | +| `TokenEngine` | `nil` | Token engine for pseudonymization | + +## Production Backend + +See [privacy-engine.elastic](https://github.com/ln80/privacy-engine.elastic) for a serverless implementation using AWS DynamoDB (key/token storage) and KMS (master key management), deployable via SAM. + +## Wire Format + +Encrypted field values are stored as: + +``` +ENC... +``` + +The `ENC.` prefix is used for idempotency detection (fields already encrypted are not re-encrypted). Legacy `> (8 * i)) + nonce[len(nonce)-1-i] ^= byte(counter >> (8 * i)) // #nosec G115 -- intentional byte extraction } return nonce diff --git a/example_test.go b/example_test.go index 79bebf0..251a930 100644 --- a/example_test.go +++ b/example_test.go @@ -28,7 +28,7 @@ func Example() { newProtector := func(namespace string) privacy.Protector { return privacy.NewProtector(namespace, memory.NewKeyEngine(), func(pc *privacy.ProtectorConfig) { // Token engine is optional. - // if not provided, the protector service will panic when trying to Tokenize/Detokenize sensitive data + // If not provided, tokenization methods return core.ErrTokenEngineNotConfigured. pc.TokenEngine = memory.NewTokenEngine() // If cache is enabled then the service will decorates engines @@ -88,8 +88,8 @@ func Example() { // Encrypted Output ex: // Profile{ - // Email: " 1<<16-1 { + return nil, errors.New("subject id too long") + } keys, err := p.KeyEngine.GetKeys(ctx, p.namespace, []string{subID}) if err != nil { @@ -390,7 +393,7 @@ func (p *protector) DeriveSubjectKey(ctx context.Context, subID, purpose string) } info := make([]byte, 2+len(subID)+len(purpose)) - binary.BigEndian.PutUint16(info, uint16(len(subID))) + binary.BigEndian.PutUint16(info, uint16(len(subID))) // #nosec G115 -- length validated above copy(info[2:], subID) copy(info[2+len(subID):], purpose) r := hkdf.New(sha256.New, parentKey, []byte("privacy-engine-v1"), info)