From 04c0062f08c0571702f5d2e20e30111757fb2167 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sat, 28 Feb 2026 22:13:27 -0500 Subject: [PATCH 1/4] feat(infra): add Dockerfile, Docker Compose, and .dockerignore Multi-stage Dockerfile compiles Go backend into a static binary (CGO_ENABLED=0) with distroless runtime. Docker Compose configures backend and Qdrant services with health checks, named volumes for data persistence, and environment variable loading from .env file. Assisted by the code-assist SOP --- .../context.md | 44 +++++++++++++++++++ .../plan.md | 31 +++++++++++++ .../progress.md | 33 ++++++++++++++ .dockerignore | 8 ++++ backend/Dockerfile | 23 ++++++++++ docker-compose.yml | 31 +++++++++++++ 6 files changed, 170 insertions(+) create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/context.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/plan.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/progress.md create mode 100644 .dockerignore create mode 100644 backend/Dockerfile create mode 100644 docker-compose.yml diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/context.md b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/context.md new file mode 100644 index 0000000..81df1f3 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/context.md @@ -0,0 +1,44 @@ +# Context: Dockerfile, Docker Compose, and Environment Setup + +## Requirements + +Create infrastructure files for local development: +1. `backend/Dockerfile` - Multi-stage build for Go backend +2. `docker-compose.yml` - Backend + Qdrant services +3. `.env.example` - Documented environment variables +4. `backend/.dockerignore` - Exclude unnecessary build context files + +## Project Structure + +- **Go module:** `github.com/parth/smolterms` (go 1.25.7) +- **Entrypoint:** `backend/cmd/server/main.go` +- **Config:** `backend/internal/config/config.go` loads env vars + +## Environment Variables (from config.go) + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| PORT | No | 8080 | HTTP server port | +| LOG_LEVEL | No | info | Logging level | +| ANTHROPIC_API_KEY | Yes | - | Anthropic API key | +| OPENAI_API_KEY | Yes | - | OpenAI API key | +| QDRANT_URL | No | localhost:6334 | Qdrant gRPC endpoint | +| CACHE_DEFAULT_TTL | No | 720h | Cache TTL | + +## Docker Compose Reference (from detailed-design.md Section 10) + +The detailed design specifies: +- Backend builds from `./backend`, port 8080, env_file `.env`, depends on qdrant healthy +- Qdrant: `qdrant/qdrant:latest`, ports 6333 (REST) + 6334 (gRPC), named volume `qdrant_data` +- Qdrant healthcheck: `curl -f http://localhost:6333/healthz` + +## Key Dependencies (from go.mod) + +- goquery, anthropic-sdk-go, openai-go, go-cache, qdrant go-client, grpc +- All compiled into a static binary with CGO_ENABLED=0 + +## Patterns & Decisions + +- Use `gcr.io/distroless/static-debian12` for minimal runtime (no shell needed) +- QDRANT_URL in compose should be `qdrant:6334` (container hostname) +- Backend restart not specified (compose default); Qdrant gets `restart: unless-stopped` diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/plan.md b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/plan.md new file mode 100644 index 0000000..ac07812 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/plan.md @@ -0,0 +1,31 @@ +# Plan: Dockerfile, Docker Compose, and Environment Setup + +## Test Strategy + +Since this is an infrastructure task (no Go code to unit test), validation will be: +1. `docker build -t smolterms-backend ./backend` - must succeed +2. `docker-compose config` - must validate without errors +3. `.env.example` must contain all variables from config.go + +## Implementation Plan + +### 1. backend/Dockerfile +- Stage 1 (builder): `golang:1.25-alpine`, copy go.mod/go.sum, `go mod download`, copy source, `CGO_ENABLED=0 go build -o /app ./cmd/server/main.go` +- Stage 2 (runtime): `gcr.io/distroless/static-debian12`, copy binary, expose 8080, entrypoint `["/app"]` +- Note: go.mod is at repo root but Dockerfile context is `backend/` - need to handle this. Actually, looking at the structure, go.mod is at the repo root, not in backend/. The build context needs to include go.mod. Options: + - Option A: Set build context to repo root, specify dockerfile as backend/Dockerfile + - Option B: Move go.mod into backend/ (not appropriate, it's at root) + - Decision: Use Option A - build context is `.` (root) with dockerfile `backend/Dockerfile` + +### 2. backend/.dockerignore +- Exclude: .git, .env*, *.md, .agents/, extension/ + +### 3. docker-compose.yml +- Follow Section 10 of detailed design exactly +- Add `restart: unless-stopped` for qdrant (per task requirements) +- Set `QDRANT_URL=qdrant:6334` as environment in backend service +- Add `environment` section for QDRANT_URL override + +### 4. .env.example +- All 6 variables with descriptions and placeholders +- Required vars marked clearly diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/progress.md b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/progress.md new file mode 100644 index 0000000..ee4f75c --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-01-dockerfile-compose-and-env/progress.md @@ -0,0 +1,33 @@ +# Progress: Dockerfile, Docker Compose, and Environment Setup + +## Setup +- [x] Created documentation directory structure +- [x] Discovered instruction files (backend/README.md, CLAUDE.md) +- [x] Read detailed design document (Section 10: Docker Compose Setup) +- [x] Explored project structure and config.go +- [x] Created context.md + +## Explore Phase +- [x] Analyzed requirements from task file +- [x] Identified env vars from config.go +- [x] Reviewed docker compose reference in detailed design +- [x] Confirmed no existing Docker files + +## Plan Phase +- [x] Created implementation plan (see plan.md) + +## Code Phase +- [x] Create `backend/Dockerfile` — multi-stage: golang:1.25-alpine builder + distroless runtime +- [x] Create `.dockerignore` — at repo root (build context is root since go.mod is there) +- [x] Create `docker-compose.yml` — backend + qdrant, healthcheck, named volume +- [x] `.env.example` — already existed in git with all 6 variables documented +- [x] Validate YAML syntax — python3 yaml.safe_load passed +- [x] Validate acceptance criteria — all 7 criteria met + +### Key Decisions +- Build context is repo root (`.`) not `backend/` because `go.mod` is at root +- `.dockerignore` placed at repo root (not backend/) to match build context +- `QDRANT_URL=qdrant:6334` set as environment override in docker-compose (overrides .env default) + +## Commit Phase +- [ ] Commit with conventional commit message diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..21f1e6d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +.github +.agents +.env +.env.* +!.env.example +*.md +extension/ diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..1e52e9b --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,23 @@ +# Build stage +FROM golang:1.25-alpine AS builder + +WORKDIR /build + +# Copy dependency files first for better layer caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code +COPY backend/ ./backend/ + +# Build static binary +RUN CGO_ENABLED=0 GOOS=linux go build -o /app ./backend/cmd/server/main.go + +# Runtime stage +FROM gcr.io/distroless/static-debian12 + +COPY --from=builder /app /app + +EXPOSE 8080 + +ENTRYPOINT ["/app"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f321d17 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,31 @@ +services: + backend: + build: + context: . + dockerfile: backend/Dockerfile + ports: + - "8080:8080" + env_file: + - .env + environment: + - QDRANT_URL=qdrant:6334 + depends_on: + qdrant: + condition: service_healthy + + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_data:/qdrant/storage + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"] + interval: 10s + timeout: 5s + retries: 3 + +volumes: + qdrant_data: From 6afbab581e098a56eca349b04c18c169d941585f Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sat, 28 Feb 2026 22:37:16 -0500 Subject: [PATCH 2/4] feat(backend): wire real dependencies in main.go Replace nil stubs in main.go with fully constructed pipeline dependencies: embedding client (OpenAI), vector store (Qdrant), LLM client (Anthropic), RAG pipeline, in-memory cache, and analyzer. Add HealthCheck method to QdrantStore for Qdrant connectivity verification on the health endpoint. - Parse CacheDefaultTTL with fail-fast validation - Initialize components in dependency order with structured logging - Wire analyzer as PipelineRunner and store.HealthCheck to router - Add 3 tests for QdrantStore.HealthCheck (success, error, context) Assisted by the code-assist SOP --- .../wire-real-dependencies/context.md | 32 ++++++++++++ .../wire-real-dependencies/plan.md | 26 ++++++++++ .../wire-real-dependencies/progress.md | 37 ++++++++++++++ backend/cmd/server/main.go | 51 +++++++++++++++++-- backend/internal/vectorstore/qdrant.go | 9 ++++ backend/internal/vectorstore/qdrant_test.go | 42 ++++++++++++++- 6 files changed, 190 insertions(+), 7 deletions(-) create mode 100644 .agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/context.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/plan.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/progress.md diff --git a/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/context.md b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/context.md new file mode 100644 index 0000000..6795649 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/context.md @@ -0,0 +1,32 @@ +# Context: Wire Real Dependencies in main.go + +## Requirements + +Update `backend/cmd/server/main.go` to construct and wire all real pipeline dependencies instead of passing `nil`. This connects the full analysis pipeline end-to-end. + +## Dependency Chain (Construction Order) + +1. `config.Load()` -> `*config.Config` +2. `config.NewLogger(cfg.LogLevel)` -> `*slog.Logger` +3. `time.ParseDuration(cfg.CacheDefaultTTL)` -> `time.Duration` (for cache TTL) +4. `embedding.NewOpenAIClient(cfg, logger)` -> `*OpenAIClient` +5. `vectorstore.NewQdrantStore(cfg, logger)` -> `(*QdrantStore, error)` +6. `llm.NewAnthropicClient(cfg.AnthropicAPIKey, logger)` -> `*AnthropicClient` +7. `rag.NewPipeline(embedder, store, logger, "smolterms")` -> `*Pipeline` +8. `cache.NewMemoryCache(ttl, cleanupInterval)` -> `*MemoryCache` +9. `analyzer.NewAnalyzer(pipeline, llmClient, memCache, logger)` -> `*Analyzer` +10. `api.NewRouter(logger, cfg, analyzer, store.HealthCheck)` -> `http.Handler` + +## Key Design Decisions + +- **Collection name**: Use `"smolterms"` as the Qdrant collection name +- **Health check**: Need to add `HealthCheck` method to `QdrantStore` (uses `CollectionExists` for connectivity) +- **Cache cleanup interval**: Use TTL/2 as cleanup interval (standard go-cache pattern) +- **Error handling**: Fail fast with slog.Error + os.Exit(1) for initialization failures +- **Logging**: Log each component initialization step + +## Existing Patterns + +- `main.go` already uses `fmt.Fprintf(os.Stderr, ...)` for config errors +- Logger is already constructed: `config.NewLogger(cfg.LogLevel)` +- `api.NewRouter` expects `(logger, cfg, pipeline PipelineRunner, qdrantCheck func(ctx) error)` diff --git a/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/plan.md b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/plan.md new file mode 100644 index 0000000..2da88bb --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/plan.md @@ -0,0 +1,26 @@ +# Plan: Wire Real Dependencies + +## Test Strategy + +Since `main.go` is the application entry point and deals with concrete type construction, traditional unit tests aren't the right approach. Instead: + +1. **Add HealthCheck method to QdrantStore** - this is a new method that IS testable +2. **Verify compilation** - the main validation is that the code compiles correctly with all types +3. **Run existing tests** - ensure nothing is broken by the changes + +### Test Scenarios + +1. **QdrantStore.HealthCheck succeeds** - when CollectionExists returns no error +2. **QdrantStore.HealthCheck fails** - when CollectionExists returns an error +3. **Build succeeds** - main.go compiles with all real dependencies wired +4. **All existing tests pass** - no regressions + +## Implementation Plan + +- [x] Setup documentation +- [ ] Add HealthCheck method to QdrantStore (+ tests) +- [ ] Update main.go to wire all dependencies +- [ ] Remove TODO comments +- [ ] Verify build succeeds +- [ ] Verify all tests pass +- [ ] Commit diff --git a/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/progress.md b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/progress.md new file mode 100644 index 0000000..781e145 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/wire-real-dependencies/progress.md @@ -0,0 +1,37 @@ +# Progress: Wire Real Dependencies + +## Execution Log + +- [x] Setup: Created documentation directory +- [x] Explore: Read all package constructors and main.go +- [x] Plan: Designed wiring approach +- [x] Code: Added HealthCheck method to QdrantStore (3 tests, all pass) +- [x] Code: Wrote tests for HealthCheck (TDD: RED -> GREEN) +- [x] Code: Wired all dependencies in main.go +- [x] Validate: Build succeeds, all 10 packages pass tests +- [ ] Commit + +## TDD Cycle + +### Cycle 1: QdrantStore.HealthCheck +- **RED**: Added 3 tests (success, error, context cancelled) - compile fails (method not found) +- **GREEN**: Implemented `HealthCheck` using `CollectionExists("_health_check")` - all 3 pass +- **REFACTOR**: No refactoring needed, implementation is minimal + +### Changes Made + +1. `backend/internal/vectorstore/qdrant.go` - Added `HealthCheck(ctx) error` method +2. `backend/internal/vectorstore/qdrant_test.go` - Added 3 test cases for HealthCheck +3. `backend/cmd/server/main.go` - Replaced TODO stubs with full dependency wiring + +### Acceptance Criteria Verification + +| # | Criterion | Status | +|---|-----------|--------| +| 1 | All Dependencies Constructed | PASS | +| 2 | Correct Dependency Order | PASS | +| 3 | Qdrant Health Check Wired | PASS | +| 4 | Analyzer Pipeline Functional | PASS | +| 5 | Initialization Failures Clear | PASS | +| 6 | Startup Logging | PASS | +| 7 | No TODOs Remain | PASS | diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go index 2bd17fb..d23ad8c 100644 --- a/backend/cmd/server/main.go +++ b/backend/cmd/server/main.go @@ -4,11 +4,20 @@ import ( "fmt" "net/http" "os" + "time" + "github.com/parth/smolterms/backend/internal/analyzer" "github.com/parth/smolterms/backend/internal/api" + "github.com/parth/smolterms/backend/internal/cache" "github.com/parth/smolterms/backend/internal/config" + "github.com/parth/smolterms/backend/internal/embedding" + "github.com/parth/smolterms/backend/internal/llm" + "github.com/parth/smolterms/backend/internal/rag" + "github.com/parth/smolterms/backend/internal/vectorstore" ) +const qdrantCollection = "smolterms" + func main() { cfg, err := config.Load() if err != nil { @@ -18,11 +27,43 @@ func main() { logger := config.NewLogger(cfg.LogLevel) - // TODO: Construct analyzer pipeline dependencies (rag.Pipeline, llm.LLMClient, - // embedding.EmbeddingClient, vectorstore.VectorStore, cache) and pass - // the *analyzer.Analyzer as the pipeline parameter. - // TODO: Pass a Qdrant health check function once the vector store is wired. - router := api.NewRouter(logger, cfg, nil, nil) + // Parse cache TTL from configuration. + cacheTTL, err := time.ParseDuration(cfg.CacheDefaultTTL) + if err != nil { + logger.Error("invalid CACHE_DEFAULT_TTL", "value", cfg.CacheDefaultTTL, "error", err) + os.Exit(1) + } + + // Initialize OpenAI embedding client. + embedder := embedding.NewOpenAIClient(cfg, logger) + logger.Info("initialized embedding client", "provider", "openai") + + // Initialize Qdrant vector store. + store, err := vectorstore.NewQdrantStore(cfg, logger) + if err != nil { + logger.Error("failed to initialize qdrant vector store", "error", err) + os.Exit(1) + } + logger.Info("initialized vector store", "provider", "qdrant", "url", cfg.QdrantURL) + + // Initialize Anthropic LLM client. + llmClient := llm.NewAnthropicClient(cfg.AnthropicAPIKey, logger) + logger.Info("initialized llm client", "provider", "anthropic") + + // Initialize RAG pipeline with embedding client and vector store. + ragPipeline := rag.NewPipeline(embedder, store, logger, qdrantCollection) + logger.Info("initialized rag pipeline", "collection", qdrantCollection) + + // Initialize in-memory cache. + memCache := cache.NewMemoryCache(cacheTTL, cacheTTL/2) + logger.Info("initialized cache", "type", "memory", "ttl", cacheTTL) + + // Initialize analyzer with all dependencies. + analyzerPipeline := analyzer.NewAnalyzer(ragPipeline, llmClient, memCache, logger) + logger.Info("initialized analyzer pipeline") + + // Wire the router with the analyzer and Qdrant health check. + router := api.NewRouter(logger, cfg, analyzerPipeline, store.HealthCheck) addr := ":" + cfg.Port logger.Info("starting server", "addr", addr) diff --git a/backend/internal/vectorstore/qdrant.go b/backend/internal/vectorstore/qdrant.go index 6d1583a..0e1cec7 100644 --- a/backend/internal/vectorstore/qdrant.go +++ b/backend/internal/vectorstore/qdrant.go @@ -199,6 +199,15 @@ func chunkFromPayload(payload map[string]*qdrant.Value, score float32) Chunk { return c } +// HealthCheck verifies Qdrant connectivity by issuing a lightweight +// CollectionExists call. It returns nil on success or the underlying error. +func (s *QdrantStore) HealthCheck(ctx context.Context) error { + // CollectionExists is a cheap gRPC call that validates the connection + // without any side effects. + _, err := s.client.CollectionExists(ctx, "_health_check") + return err +} + // toAnySlice converts []string to []any for use with qdrant.NewValueMap. func toAnySlice(ss []string) []any { out := make([]any, len(ss)) diff --git a/backend/internal/vectorstore/qdrant_test.go b/backend/internal/vectorstore/qdrant_test.go index a5c0c4e..8d96497 100644 --- a/backend/internal/vectorstore/qdrant_test.go +++ b/backend/internal/vectorstore/qdrant_test.go @@ -98,8 +98,8 @@ func (h *testLogHandler) Handle(_ context.Context, r slog.Record) error { h.entries = append(h.entries, e) return nil } -func (h *testLogHandler) WithAttrs(_ []slog.Attr) slog.Handler { return h } -func (h *testLogHandler) WithGroup(_ string) slog.Handler { return h } +func (h *testLogHandler) WithAttrs(_ []slog.Attr) slog.Handler { return h } +func (h *testLogHandler) WithGroup(_ string) slog.Handler { return h } // newTestStore constructs a QdrantStore with the given mock client. func newTestStore(mock *mockQdrantOps) *QdrantStore { @@ -557,3 +557,41 @@ func TestQdrantStore_Search_LogsEntry(t *testing.T) { t.Error("expected 'search complete' log entry, found none") } } + +func TestQdrantStore_HealthCheck_Success(t *testing.T) { + mock := &mockQdrantOps{collectionExistsResult: true} + store := newTestStore(mock) + + err := store.HealthCheck(context.Background()) + if err != nil { + t.Errorf("HealthCheck() error = %v, want nil", err) + } +} + +func TestQdrantStore_HealthCheck_Error(t *testing.T) { + mock := &mockQdrantOps{collectionExistsErr: errors.New("connection refused")} + store := newTestStore(mock) + + err := store.HealthCheck(context.Background()) + if err == nil { + t.Fatal("HealthCheck() expected error, got nil") + } + if !strings.Contains(err.Error(), "connection refused") { + t.Errorf("error = %v, want it to contain %q", err, "connection refused") + } +} + +func TestQdrantStore_HealthCheck_ContextCancelled(t *testing.T) { + mock := &mockQdrantOps{} + store := newTestStore(mock) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + err := store.HealthCheck(ctx) + if err == nil { + t.Fatal("HealthCheck() expected context error, got nil") + } + if !errors.Is(err, context.Canceled) { + t.Errorf("HealthCheck() error = %v, want context.Canceled", err) + } +} From 996d5aeb9aee0af79b75c9af8b78304f96172147 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sat, 28 Feb 2026 23:44:12 -0500 Subject: [PATCH 3/4] test(backend): add end-to-end integration tests for full analysis pipeline Add deterministic integration tests that exercise the complete HTTP pipeline with mocked external services, plus build-tag gated tests for real API keys. Deterministic tests (14): full pipeline happy path, score validation, risk level consistency, caching behavior, non-policy content, invalid requests, health endpoint, CORS/request-ID headers, content type validation. Real integration tests (3): gated with //go:build integration tag, skip gracefully when API keys unavailable. Assisted by the code-assist SOP --- .../context.md | 37 + .../plan.md | 72 ++ .../progress.md | 58 ++ .../internal/integration/integration_test.go | 643 ++++++++++++++++++ .../integration/real_integration_test.go | 277 ++++++++ .../integration/testdata/news_article.html | 36 + .../integration/testdata/privacy_policy.html | 67 ++ 7 files changed, 1190 insertions(+) create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/context.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/plan.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/progress.md create mode 100644 backend/internal/integration/integration_test.go create mode 100644 backend/internal/integration/real_integration_test.go create mode 100644 backend/internal/integration/testdata/news_article.html create mode 100644 backend/internal/integration/testdata/privacy_policy.html diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/context.md b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/context.md new file mode 100644 index 0000000..645b980 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/context.md @@ -0,0 +1,37 @@ +# Context: End-to-End Integration Test + +## Project Structure +- Go 1.25.7 project at `backend/` +- Package layout: `backend/internal/{api,analyzer,llm,embedding,vectorstore,cache,rag,extractor,config,types}` +- Entry point: `backend/cmd/server/main.go` +- Dependency wiring: config -> embedder -> store -> llmClient -> ragPipeline -> cache -> analyzer -> router + +## Key Interfaces (Mock Points) +- `llm.LLMClient` - `Complete(ctx, prompt) (string, error)` - mock: `MockLLMClient` +- `embedding.EmbeddingClient` - `Embed(ctx, texts) ([][]float32, error)` - mock: `MockEmbeddingClient` +- `vectorstore.VectorStore` - `Upsert/Search` - mock: `MockVectorStore` +- `cache.Cache` / `analyzer.AnalysisCache` - `Get/Set` - mock: `MockCache` +- `api.PipelineRunner` - `Analyze(ctx, AnalysisRequest) (*AnalysisResult, error)` + +## API Contract +- `POST /api/v1/analyze` - accepts `{"url":"...","html":"..."}` -> returns `AnalysisResult` +- `GET /api/v1/health` - returns `{"status":"ok","services":{...}}` +- Middleware stack: CORS -> RequestID -> Timeout(60s) -> Logging + +## Response Types +- `AnalysisResult`: url, overall_score, risk_level, dimensions (map of 5), key_concerns, summary, cached, analyzed_at +- `DimScore`: score (float64), summary (string) +- 5 dimensions: data_collection, data_sharing, user_rights, retention, security +- Risk levels: low (8-10), moderate (5-7.9), high (3-4.9), critical (1-2.9), not_policy + +## Testing Patterns Used in Codebase +- Standard `testing` package (no testify) +- Table-driven tests with `t.Run()` +- `httptest.NewServer` / `httptest.NewRecorder` for HTTP tests +- Mock structs with recorded calls +- `captureHandler` for slog assertions +- No external test framework dependencies + +## Two Test Approaches +1. **Deterministic integration tests** (no build tag) - mock external services, exercise full HTTP pipeline +2. **Real integration tests** (gated with `//go:build integration`) - require real API keys and Qdrant diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/plan.md b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/plan.md new file mode 100644 index 0000000..2709a17 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/plan.md @@ -0,0 +1,72 @@ +# Plan: End-to-End Integration Test + +## Test Strategy + +### Approach: Two-Tier Integration Tests + +**Tier 1: Deterministic Integration Tests** (`backend/internal/integration/integration_test.go`) +- No build tag - runs with `go test ./backend/...` +- Mocks external services (LLM, Embedder, VectorStore) but exercises the full pipeline through HTTP +- Wires real dependencies: parser, detector, chunker, RAG pipeline (with mocks), analyzer, API router +- Tests the complete HTTP request/response cycle via `httptest.Server` +- Fully deterministic and reproducible + +**Tier 2: Real Integration Tests** (`backend/internal/integration/real_integration_test.go`) +- Gated with `//go:build integration` build tag +- Requires real API keys and running Qdrant +- Exercises the true end-to-end flow +- Run with: `go test -tags=integration ./backend/internal/integration/...` + +### Test Scenarios + +#### Tier 1 Tests (Deterministic) + +1. **TestFullPipelineHappyPath** - POST valid privacy policy HTML, verify complete scored response + - All 5 dimension scores present and in range 1-10 + - Overall score is average of 5 dimensions + - Risk level matches score range + - Key concerns present + - Summary present + - cached=false on first request + +2. **TestCachingBehavior** - Same URL+HTML twice, second returns cached=true + - Uses real MemoryCache (not mocked) + - Verifies cached=true on second request + - Verifies LLM not called on second request + +3. **TestNonPolicyContent** - Non-policy HTML returns risk_level="not_policy" + - No LLM call made + - Appropriate response structure + +4. **TestInvalidRequest_EmptyURL** - 400 for missing URL +5. **TestInvalidRequest_EmptyHTML** - 400 for missing HTML +6. **TestInvalidRequest_InvalidJSON** - 400 for malformed JSON +7. **TestHealthEndpoint** - Health check returns status OK + +#### Tier 2 Tests (Real - Build Tag Gated) + +1. **TestRealFullAnalysisPipeline** - Full e2e with real services +2. **TestRealCachingBehavior** - Caching with real services +3. **TestRealNonPolicyContent** - Non-policy with real services + +### Test Fixtures +- `testdata/privacy_policy.html` - A realistic privacy policy HTML page +- `testdata/news_article.html` - A non-policy HTML page + +## Implementation Plan + +1. Create `backend/internal/integration/` directory +2. Create `backend/internal/integration/testdata/` with HTML fixtures +3. Implement test helper: `setupTestServer()` that wires all dependencies with mocks +4. Implement deterministic integration tests +5. Implement build-tag gated integration tests (skeleton for when real keys available) +6. Run tests, verify all pass + +## Implementation Checklist +- [ ] Create directory structure +- [ ] Create HTML test fixtures +- [ ] Implement integration_test.go with helper and deterministic tests +- [ ] Implement real_integration_test.go with build tag +- [ ] Run `go test ./backend/internal/integration/...` - all pass +- [ ] Run `go test ./backend/...` - integration tests included, all pass +- [ ] Run `go test ./backend/...` without integration tag - real tests excluded diff --git a/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/progress.md b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/progress.md new file mode 100644 index 0000000..933ae27 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/task-03-end-to-end-integration-test/progress.md @@ -0,0 +1,58 @@ +# Progress: End-to-End Integration Test + +## Setup +- [x] Documentation directory created +- [x] Instruction files discovered (CLAUDE.md, backend/README.md) +- [x] context.md created +- [x] plan.md created + +## Exploration +- [x] Full codebase structure analyzed +- [x] Key interfaces and mocks identified +- [x] API handler and router patterns understood +- [x] Existing test patterns cataloged +- [x] Detailed design document reviewed + +## Implementation +- [x] Directory structure created (`backend/internal/integration/testdata/`) +- [x] HTML test fixtures created (privacy_policy.html, news_article.html) +- [x] Deterministic integration tests implemented (14 tests) +- [x] Build-tag gated real integration tests implemented (3 tests) +- [x] All tests passing (`go test ./backend/...` - all green) +- [x] Real tests properly gated and skip when no API keys +- [x] `go vet` clean +- [x] Refactoring complete - code aligned with codebase conventions + +## Test Summary + +### Deterministic Tests (always run) +| Test | Description | Status | +|------|-------------|--------| +| TestFullPipelineHappyPath | Full pipeline with valid policy HTML | PASS | +| TestScoreValidation | All scores in 1-10 range, correct average | PASS | +| TestRiskLevelConsistency | Risk level matches score range | PASS | +| TestCachingBehavior | Second request returns cached=true | PASS | +| TestNonPolicyContent | News article returns not_policy | PASS | +| TestInvalidRequest_EmptyURL | 400 for empty URL | PASS | +| TestInvalidRequest_EmptyHTML | 400 for empty HTML | PASS | +| TestInvalidRequest_MalformedJSON | 400 for invalid JSON | PASS | +| TestHealthEndpoint | Health check returns ok | PASS | +| TestAnalyzeEndpoint_WrongMethod | Wrong method returns 404 | PASS | +| TestNotFoundEndpoint | Unknown endpoint returns 404 | PASS | +| TestResponseContentType | Response has application/json | PASS | +| TestCORSHeaders | CORS headers present | PASS | +| TestRequestIDHeader | X-Request-ID header present | PASS | + +### Real Integration Tests (build tag gated) +| Test | Description | Status | +|------|-------------|--------| +| TestRealFullAnalysisPipeline | Full e2e with real services | SKIP (no keys) | +| TestRealCachingBehavior | Caching with real services | SKIP (no keys) | +| TestRealNonPolicyContent | Non-policy with real services | SKIP (no keys) | + +## TDD Notes +- Tests written first, then verified against implementation +- Fixed TestAnalyzeEndpoint_WrongMethod: expected 405 but router catch-all returns 404 +- Fixed real_integration_test.go: removed interface type assertions on concrete *QdrantStore type + +Assisted by the code-assist SOP diff --git a/backend/internal/integration/integration_test.go b/backend/internal/integration/integration_test.go new file mode 100644 index 0000000..09d88bc --- /dev/null +++ b/backend/internal/integration/integration_test.go @@ -0,0 +1,643 @@ +package integration + +import ( + "bytes" + "encoding/json" + "io" + "log/slog" + "math" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/parth/smolterms/backend/internal/analyzer" + "github.com/parth/smolterms/backend/internal/api" + "github.com/parth/smolterms/backend/internal/cache" + "github.com/parth/smolterms/backend/internal/config" + "github.com/parth/smolterms/backend/internal/embedding" + "github.com/parth/smolterms/backend/internal/llm" + "github.com/parth/smolterms/backend/internal/rag" + "github.com/parth/smolterms/backend/internal/types" + "github.com/parth/smolterms/backend/internal/vectorstore" +) + +// testServer holds the httptest server and its mock dependencies for integration tests. +type testServer struct { + server *httptest.Server + embedder *embedding.MockEmbeddingClient + store *vectorstore.MockVectorStore + llmMock *llm.MockLLMClient +} + +// close shuts down the test server. +func (ts *testServer) close() { + ts.server.Close() +} + +// url returns the base URL of the test server. +func (ts *testServer) url() string { + return ts.server.URL +} + +// validLLMResponse returns a JSON string that the scorer can parse into valid scores. +func validLLMResponse() string { + return `{ + "data_collection": {"score": 6.5, "summary": "Collects standard user data including browsing behavior via cookies."}, + "data_sharing": {"score": 5.0, "summary": "Shares data with third parties including advertising partners."}, + "user_rights": {"score": 7.5, "summary": "Good coverage of GDPR and CCPA rights."}, + "retention": {"score": 6.0, "summary": "Clear retention periods specified but some are lengthy."}, + "security": {"score": 7.0, "summary": "Adequate security measures including encryption."}, + "key_concerns": ["Data shared with advertising partners", "Cookie tracking for personalization", "Seven year transaction retention"], + "summary": "Moderately transparent privacy practices with standard data collection. Key concern is advertising data sharing." +}` +} + +// setupTestServer creates a fully wired test server with mocked external services. +// The server uses real parser, detector, chunker, RAG pipeline (with mock embedder/store), +// real analyzer, real cache, and real API router — the same wiring as main.go. +func setupTestServer(t *testing.T) *testServer { + t.Helper() + + // Mock vectors: generate enough for any embedding call. + vectors := make([][]float32, 50) + for i := range vectors { + vectors[i] = make([]float32, 4) // small dimension for tests + vectors[i][0] = float32(i) * 0.1 + vectors[i][1] = float32(i) * 0.2 + vectors[i][2] = float32(i) * 0.3 + vectors[i][3] = float32(i) * 0.4 + } + + mockEmbedder := &embedding.MockEmbeddingClient{ + ReturnVectors: vectors, + } + + mockStore := &vectorstore.MockVectorStore{ + SearchResult: []vectorstore.Chunk{ + {ID: "hash:0", Text: "We collect personal information including your name, email address, and browsing behavior.", Index: 0, Section: "Information We Collect"}, + {ID: "hash:1", Text: "We may share your information with third-party service providers and advertising partners.", Index: 1, Section: "Data Sharing"}, + {ID: "hash:2", Text: "You have the right to access, correct, or delete your personal information under GDPR and CCPA.", Index: 2, Section: "Your Rights"}, + {ID: "hash:3", Text: "We retain your personal information for the duration of your account plus three years.", Index: 3, Section: "Data Retention"}, + {ID: "hash:4", Text: "We implement encryption of data in transit using TLS and encryption of sensitive data at rest.", Index: 4, Section: "Security Measures"}, + }, + } + + mockLLM := &llm.MockLLMClient{ + ReturnResponse: validLLMResponse(), + } + + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + + ragPipeline := rag.NewPipeline(mockEmbedder, mockStore, logger, "test_integration") + memCache := cache.NewMemoryCache(5*time.Minute, 1*time.Minute) + analyzerPipeline := analyzer.NewAnalyzer(ragPipeline, mockLLM, memCache, logger) + + cfg := &config.Config{ + Port: "0", + LogLevel: "error", + AnthropicAPIKey: "test-key", + OpenAIAPIKey: "test-key", + QdrantURL: "localhost:6334", + CacheDefaultTTL: "5m", + } + + router := api.NewRouter(logger, cfg, analyzerPipeline, nil) + server := httptest.NewServer(router) + + return &testServer{ + server: server, + embedder: mockEmbedder, + store: mockStore, + llmMock: mockLLM, + } +} + +// loadTestData reads an HTML fixture from the testdata directory. +func loadTestData(t *testing.T, filename string) string { + t.Helper() + + _, currentFile, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("unable to determine test file path") + } + testdataDir := filepath.Join(filepath.Dir(currentFile), "testdata") + + data, err := os.ReadFile(filepath.Join(testdataDir, filename)) + if err != nil { + t.Fatalf("failed to read test data %q: %v", filename, err) + } + return string(data) +} + +// postAnalyze sends a POST request to /api/v1/analyze and returns the response. +func postAnalyze(t *testing.T, baseURL string, reqBody analyzer.AnalysisRequest) *http.Response { + t.Helper() + + body, err := json.Marshal(reqBody) + if err != nil { + t.Fatalf("failed to marshal request: %v", err) + } + + resp, err := http.Post(baseURL+"/api/v1/analyze", "application/json", bytes.NewReader(body)) + if err != nil { + t.Fatalf("POST /api/v1/analyze failed: %v", err) + } + return resp +} + +// decodeResponse reads and decodes the response body into the target type. +func decodeResponse[T any](t *testing.T, resp *http.Response) T { + t.Helper() + defer resp.Body.Close() + + var result T + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + return result +} + +// --- Full Pipeline Happy Path --- + +func TestFullPipelineHappyPath(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/privacy", + HTML: html, + } + + resp := postAnalyze(t, ts.url(), req) + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, body) + } + + result := decodeResponse[analyzer.AnalysisResult](t, resp) + + // Verify URL + if result.URL != req.URL { + t.Errorf("URL = %q, want %q", result.URL, req.URL) + } + + // Verify not cached on first request + if result.Cached { + t.Error("Cached = true on first request, want false") + } + + // Verify AnalyzedAt is recent + if time.Since(result.AnalyzedAt) > 10*time.Second { + t.Errorf("AnalyzedAt = %v, want within last 10s", result.AnalyzedAt) + } + + // Verify all 5 dimensions present + if len(result.Dimensions) != 5 { + t.Fatalf("len(Dimensions) = %d, want 5", len(result.Dimensions)) + } + + for _, dim := range analyzer.AllDimensions() { + ds, ok := result.Dimensions[dim] + if !ok { + t.Errorf("missing dimension %q", dim) + continue + } + if ds.Score < 1.0 || ds.Score > 10.0 { + t.Errorf("dimension %q score = %v, want 1.0-10.0", dim, ds.Score) + } + if ds.Summary == "" { + t.Errorf("dimension %q summary is empty", dim) + } + } + + // Verify overall score is average of dimensions + var sum float64 + for _, dim := range analyzer.AllDimensions() { + sum += result.Dimensions[dim].Score + } + expectedOverall := math.Round(sum/5*10) / 10 + if result.OverallScore != expectedOverall { + t.Errorf("OverallScore = %v, want %v (average of dimensions)", result.OverallScore, expectedOverall) + } + + // Verify risk level matches score range + expectedRisk := analyzer.DetermineRiskLevel(result.OverallScore) + if result.RiskLevel != expectedRisk { + t.Errorf("RiskLevel = %q, want %q for score %v", result.RiskLevel, expectedRisk, result.OverallScore) + } + + // Verify risk level is not "not_policy" since this is a policy + if result.RiskLevel == analyzer.RiskNotPolicy { + t.Error("RiskLevel = not_policy for valid privacy policy HTML") + } + + // Verify key concerns and summary + if len(result.KeyConcerns) == 0 { + t.Error("KeyConcerns is empty, want non-empty") + } + if result.Summary == "" { + t.Error("Summary is empty, want non-empty") + } + + // Verify mock dependency calls + if ts.llmMock.CallCount != 1 { + t.Errorf("LLM call count = %d, want 1", ts.llmMock.CallCount) + } + if ts.embedder.CallCount < 2 { + t.Errorf("Embedder call count = %d, want >= 2 (store + retrieve)", ts.embedder.CallCount) + } + if len(ts.store.UpsertCalls) == 0 { + t.Error("VectorStore.Upsert was not called") + } + if len(ts.store.SearchCalls) == 0 { + t.Error("VectorStore.Search was not called") + } +} + +// --- Score Validation --- + +func TestScoreValidation(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/privacy", + HTML: html, + } + + resp := postAnalyze(t, ts.url(), req) + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, body) + } + + result := decodeResponse[analyzer.AnalysisResult](t, resp) + + // Check each dimension score is in valid range + for _, dim := range analyzer.AllDimensions() { + ds := result.Dimensions[dim] + if ds.Score < 1.0 || ds.Score > 10.0 { + t.Errorf("dimension %q score = %v, out of valid range [1.0, 10.0]", dim, ds.Score) + } + } + + // Verify overall score is within valid range + if result.OverallScore < 1.0 || result.OverallScore > 10.0 { + t.Errorf("OverallScore = %v, out of valid range [1.0, 10.0]", result.OverallScore) + } + + // Verify overall score is the correct average + var sum float64 + for _, dim := range analyzer.AllDimensions() { + sum += result.Dimensions[dim].Score + } + expectedOverall := math.Round(sum/5*10) / 10 + if result.OverallScore != expectedOverall { + t.Errorf("OverallScore = %v, want %v (average of 5 dimensions)", result.OverallScore, expectedOverall) + } +} + +// --- Risk Level Consistency --- + +func TestRiskLevelConsistency(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/privacy", + HTML: html, + } + + resp := postAnalyze(t, ts.url(), req) + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, body) + } + + result := decodeResponse[analyzer.AnalysisResult](t, resp) + + // Verify risk level matches the overall score range + score := result.OverallScore + var expectedLevel string + switch { + case score >= 8.0: + expectedLevel = analyzer.RiskLow + case score >= 5.0: + expectedLevel = analyzer.RiskModerate + case score >= 3.0: + expectedLevel = analyzer.RiskHigh + default: + expectedLevel = analyzer.RiskCritical + } + + if result.RiskLevel != expectedLevel { + t.Errorf("RiskLevel = %q, want %q for OverallScore = %v", result.RiskLevel, expectedLevel, score) + } +} + +// --- Caching Behavior --- + +func TestCachingBehavior(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/privacy-cache-test", + HTML: html, + } + + // First request: should not be cached + resp1 := postAnalyze(t, ts.url(), req) + if resp1.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp1.Body) + resp1.Body.Close() + t.Fatalf("first request: status = %d, want %d; body = %s", resp1.StatusCode, http.StatusOK, body) + } + result1 := decodeResponse[analyzer.AnalysisResult](t, resp1) + + if result1.Cached { + t.Error("first request: Cached = true, want false") + } + + llmCallsAfterFirst := ts.llmMock.CallCount + + // Second request: same URL + HTML should be cached + resp2 := postAnalyze(t, ts.url(), req) + if resp2.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp2.Body) + resp2.Body.Close() + t.Fatalf("second request: status = %d, want %d; body = %s", resp2.StatusCode, http.StatusOK, body) + } + result2 := decodeResponse[analyzer.AnalysisResult](t, resp2) + + if !result2.Cached { + t.Error("second request: Cached = false, want true") + } + + // Verify LLM was NOT called again for cached response + if ts.llmMock.CallCount != llmCallsAfterFirst { + t.Errorf("LLM called %d times after cache hit, want 0 additional calls (total before: %d, total after: %d)", + ts.llmMock.CallCount-llmCallsAfterFirst, llmCallsAfterFirst, ts.llmMock.CallCount) + } + + // Verify scores are consistent between cached and uncached + if result1.OverallScore != result2.OverallScore { + t.Errorf("cached OverallScore = %v, want %v (same as first request)", result2.OverallScore, result1.OverallScore) + } + if result1.RiskLevel != result2.RiskLevel { + t.Errorf("cached RiskLevel = %q, want %q", result2.RiskLevel, result1.RiskLevel) + } +} + +// --- Non-Policy Content --- + +func TestNonPolicyContent(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "news_article.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/news/science", + HTML: html, + } + + llmCallsBefore := ts.llmMock.CallCount + + resp := postAnalyze(t, ts.url(), req) + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, body) + } + + result := decodeResponse[analyzer.AnalysisResult](t, resp) + + // Verify risk level is not_policy + if result.RiskLevel != analyzer.RiskNotPolicy { + t.Errorf("RiskLevel = %q, want %q", result.RiskLevel, analyzer.RiskNotPolicy) + } + + // Verify URL is set + if result.URL != req.URL { + t.Errorf("URL = %q, want %q", result.URL, req.URL) + } + + // Verify no LLM call was made + if ts.llmMock.CallCount != llmCallsBefore { + t.Errorf("LLM called %d additional times for non-policy content, want 0", + ts.llmMock.CallCount-llmCallsBefore) + } + + // Verify no vector store calls were made for non-policy content + storeCalls := len(ts.store.UpsertCalls) + if storeCalls > 0 { + t.Errorf("VectorStore.Upsert called %d times for non-policy content, want 0", storeCalls) + } +} + +// --- Invalid Request: Empty URL --- + +func TestInvalidRequest_EmptyURL(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + body := `{"url":"","html":"some content"}` + resp, err := http.Post(ts.url()+"/api/v1/analyze", "application/json", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST failed: %v", err) + } + + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest) + } + + errResp := decodeResponse[types.ErrorResponse](t, resp) + if errResp.Error == "" { + t.Error("error message is empty, want non-empty") + } +} + +// --- Invalid Request: Empty HTML --- + +func TestInvalidRequest_EmptyHTML(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + body := `{"url":"https://example.com","html":""}` + resp, err := http.Post(ts.url()+"/api/v1/analyze", "application/json", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST failed: %v", err) + } + + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest) + } + + errResp := decodeResponse[types.ErrorResponse](t, resp) + if errResp.Error == "" { + t.Error("error message is empty, want non-empty") + } +} + +// --- Invalid Request: Malformed JSON --- + +func TestInvalidRequest_MalformedJSON(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Post(ts.url()+"/api/v1/analyze", "application/json", strings.NewReader("{invalid json")) + if err != nil { + t.Fatalf("POST failed: %v", err) + } + + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusBadRequest) + } + + errResp := decodeResponse[types.ErrorResponse](t, resp) + if errResp.Error == "" { + t.Error("error message is empty, want non-empty") + } +} + +// --- Health Endpoint --- + +func TestHealthEndpoint(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Get(ts.url() + "/api/v1/health") + if err != nil { + t.Fatalf("GET /api/v1/health failed: %v", err) + } + + if resp.StatusCode != http.StatusOK { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + + health := decodeResponse[types.HealthResponse](t, resp) + + if health.Status != "ok" { + t.Errorf("Status = %q, want %q", health.Status, "ok") + } + + // Verify services are reported + if health.Services == nil { + t.Fatal("Services is nil, want non-nil") + } + if health.Services["anthropic"] != "configured" { + t.Errorf("anthropic = %q, want %q", health.Services["anthropic"], "configured") + } + if health.Services["openai"] != "configured" { + t.Errorf("openai = %q, want %q", health.Services["openai"], "configured") + } +} + +// --- Wrong Method on Analyze Endpoint --- + +func TestAnalyzeEndpoint_WrongMethod(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Get(ts.url() + "/api/v1/analyze") + if err != nil { + t.Fatalf("GET /api/v1/analyze failed: %v", err) + } + defer resp.Body.Close() + + // The catch-all route returns 404 for unmatched method+path combinations. + // GET /api/v1/analyze doesn't match POST /api/v1/analyze, so it falls through. + if resp.StatusCode != http.StatusNotFound { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusNotFound) + } +} + +// --- Not Found --- + +func TestNotFoundEndpoint(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Get(ts.url() + "/api/v1/nonexistent") + if err != nil { + t.Fatalf("GET /api/v1/nonexistent failed: %v", err) + } + + if resp.StatusCode != http.StatusNotFound { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusNotFound) + } + + errResp := decodeResponse[types.ErrorResponse](t, resp) + if errResp.Error == "" { + t.Error("error message is empty, want non-empty") + } +} + +// --- Response Content-Type --- + +func TestResponseContentType(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://example.com/privacy", + HTML: html, + } + + resp := postAnalyze(t, ts.url(), req) + defer resp.Body.Close() + + ct := resp.Header.Get("Content-Type") + if ct != "application/json" { + t.Errorf("Content-Type = %q, want %q", ct, "application/json") + } +} + +// --- CORS Headers --- + +func TestCORSHeaders(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Get(ts.url() + "/api/v1/health") + if err != nil { + t.Fatalf("GET /api/v1/health failed: %v", err) + } + defer resp.Body.Close() + + cors := resp.Header.Get("Access-Control-Allow-Origin") + if cors == "" { + t.Error("Access-Control-Allow-Origin header is missing") + } +} + +// --- Request ID Header --- + +func TestRequestIDHeader(t *testing.T) { + ts := setupTestServer(t) + defer ts.close() + + resp, err := http.Get(ts.url() + "/api/v1/health") + if err != nil { + t.Fatalf("GET /api/v1/health failed: %v", err) + } + defer resp.Body.Close() + + requestID := resp.Header.Get("X-Request-ID") + if requestID == "" { + t.Error("X-Request-ID header is missing") + } +} diff --git a/backend/internal/integration/real_integration_test.go b/backend/internal/integration/real_integration_test.go new file mode 100644 index 0000000..ccbe2bf --- /dev/null +++ b/backend/internal/integration/real_integration_test.go @@ -0,0 +1,277 @@ +//go:build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "io" + "log/slog" + "math" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/parth/smolterms/backend/internal/analyzer" + "github.com/parth/smolterms/backend/internal/api" + "github.com/parth/smolterms/backend/internal/cache" + "github.com/parth/smolterms/backend/internal/config" + "github.com/parth/smolterms/backend/internal/embedding" + "github.com/parth/smolterms/backend/internal/llm" + "github.com/parth/smolterms/backend/internal/rag" + "github.com/parth/smolterms/backend/internal/vectorstore" +) + +const testCollection = "smolterms_integration_test" + +// realTestServer holds a test server wired with real external services. +type realTestServer struct { + server *httptest.Server + store vectorstore.VectorStore + cfg *config.Config +} + +func (rts *realTestServer) close(t *testing.T) { + t.Helper() + rts.server.Close() + // Cleanup: we don't have a DeleteCollection method on the VectorStore interface, + // so cleanup is limited to letting Qdrant TTL or manual cleanup handle it. + // In production, you'd add a DeleteCollection method for test cleanup. +} + +// setupRealTestServer creates a test server wired with real Anthropic, OpenAI, and Qdrant. +// Requires ANTHROPIC_API_KEY, OPENAI_API_KEY, and a running Qdrant instance. +func setupRealTestServer(t *testing.T) *realTestServer { + t.Helper() + + // Load real configuration from environment + cfg, err := config.Load() + if err != nil { + t.Skipf("skipping real integration test: %v", err) + } + + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelWarn})) + + // Initialize real clients + embedder := embedding.NewOpenAIClient(cfg, logger) + + store, err := vectorstore.NewQdrantStore(cfg, logger) + if err != nil { + t.Skipf("skipping real integration test: qdrant unavailable: %v", err) + } + + // Verify Qdrant connectivity + if err := store.HealthCheck(context.Background()); err != nil { + t.Skipf("skipping real integration test: qdrant health check failed: %v", err) + } + + llmClient := llm.NewAnthropicClient(cfg.AnthropicAPIKey, logger) + ragPipeline := rag.NewPipeline(embedder, store, logger, testCollection) + memCache := cache.NewMemoryCache(5*time.Minute, 1*time.Minute) + analyzerPipeline := analyzer.NewAnalyzer(ragPipeline, llmClient, memCache, logger) + + router := api.NewRouter(logger, cfg, analyzerPipeline, store.HealthCheck) + server := httptest.NewServer(router) + + return &realTestServer{ + server: server, + store: store, + cfg: cfg, + } +} + +func TestRealFullAnalysisPipeline(t *testing.T) { + if testing.Short() { + t.Skip("skipping real integration test in short mode") + } + + rts := setupRealTestServer(t) + defer rts.close(t) + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://integration-test.example.com/privacy", + HTML: html, + } + + body, err := json.Marshal(req) + if err != nil { + t.Fatalf("failed to marshal request: %v", err) + } + + resp, err := http.Post(rts.server.URL+"/api/v1/analyze", "application/json", bytes.NewReader(body)) + if err != nil { + t.Fatalf("POST /api/v1/analyze failed: %v", err) + } + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, respBody) + } + + var result analyzer.AnalysisResult + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + resp.Body.Close() + + // Verify URL + if result.URL != req.URL { + t.Errorf("URL = %q, want %q", result.URL, req.URL) + } + + // Verify not cached + if result.Cached { + t.Error("Cached = true on first request, want false") + } + + // Verify all 5 dimensions present with valid scores + if len(result.Dimensions) != 5 { + t.Fatalf("len(Dimensions) = %d, want 5", len(result.Dimensions)) + } + + for _, dim := range analyzer.AllDimensions() { + ds, ok := result.Dimensions[dim] + if !ok { + t.Errorf("missing dimension %q", dim) + continue + } + if ds.Score < 1.0 || ds.Score > 10.0 { + t.Errorf("dimension %q score = %v, want 1.0-10.0", dim, ds.Score) + } + if ds.Summary == "" { + t.Errorf("dimension %q summary is empty", dim) + } + } + + // Verify overall score is correct average + var sum float64 + for _, dim := range analyzer.AllDimensions() { + sum += result.Dimensions[dim].Score + } + expectedOverall := math.Round(sum/5*10) / 10 + if result.OverallScore != expectedOverall { + t.Errorf("OverallScore = %v, want %v", result.OverallScore, expectedOverall) + } + + // Verify risk level + expectedRisk := analyzer.DetermineRiskLevel(result.OverallScore) + if result.RiskLevel != expectedRisk { + t.Errorf("RiskLevel = %q, want %q for score %v", result.RiskLevel, expectedRisk, result.OverallScore) + } + + // Verify key concerns and summary + if len(result.KeyConcerns) == 0 { + t.Error("KeyConcerns is empty, want non-empty") + } + if result.Summary == "" { + t.Error("Summary is empty, want non-empty") + } + + t.Logf("Real analysis result: overall=%.1f, risk=%s, concerns=%d", + result.OverallScore, result.RiskLevel, len(result.KeyConcerns)) +} + +func TestRealCachingBehavior(t *testing.T) { + if testing.Short() { + t.Skip("skipping real integration test in short mode") + } + + rts := setupRealTestServer(t) + defer rts.close(t) + + html := loadTestData(t, "privacy_policy.html") + req := analyzer.AnalysisRequest{ + URL: "https://integration-test.example.com/privacy-cache", + HTML: html, + } + + body, err := json.Marshal(req) + if err != nil { + t.Fatalf("failed to marshal request: %v", err) + } + + // First request + start1 := time.Now() + resp1, err := http.Post(rts.server.URL+"/api/v1/analyze", "application/json", bytes.NewReader(body)) + if err != nil { + t.Fatalf("first POST failed: %v", err) + } + duration1 := time.Since(start1) + + var result1 analyzer.AnalysisResult + json.NewDecoder(resp1.Body).Decode(&result1) + resp1.Body.Close() + + if result1.Cached { + t.Error("first request: Cached = true, want false") + } + + // Second request (same content) + start2 := time.Now() + resp2, err := http.Post(rts.server.URL+"/api/v1/analyze", "application/json", bytes.NewReader(body)) + if err != nil { + t.Fatalf("second POST failed: %v", err) + } + duration2 := time.Since(start2) + + var result2 analyzer.AnalysisResult + json.NewDecoder(resp2.Body).Decode(&result2) + resp2.Body.Close() + + if !result2.Cached { + t.Error("second request: Cached = false, want true") + } + + // Cached response should be significantly faster + if duration2 > duration1/2 { + t.Logf("warning: cached response (%v) not significantly faster than first (%v)", duration2, duration1) + } + + t.Logf("First request: %v, Second (cached): %v", duration1, duration2) +} + +func TestRealNonPolicyContent(t *testing.T) { + if testing.Short() { + t.Skip("skipping real integration test in short mode") + } + + rts := setupRealTestServer(t) + defer rts.close(t) + + html := loadTestData(t, "news_article.html") + req := analyzer.AnalysisRequest{ + URL: "https://integration-test.example.com/news", + HTML: html, + } + + body, err := json.Marshal(req) + if err != nil { + t.Fatalf("failed to marshal request: %v", err) + } + + resp, err := http.Post(rts.server.URL+"/api/v1/analyze", "application/json", bytes.NewReader(body)) + if err != nil { + t.Fatalf("POST failed: %v", err) + } + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("status = %d, want %d; body = %s", resp.StatusCode, http.StatusOK, respBody) + } + + var result analyzer.AnalysisResult + json.NewDecoder(resp.Body).Decode(&result) + resp.Body.Close() + + if result.RiskLevel != analyzer.RiskNotPolicy { + t.Errorf("RiskLevel = %q, want %q", result.RiskLevel, analyzer.RiskNotPolicy) + } + + t.Logf("Non-policy result: risk_level=%s", result.RiskLevel) +} diff --git a/backend/internal/integration/testdata/news_article.html b/backend/internal/integration/testdata/news_article.html new file mode 100644 index 0000000..0d56551 --- /dev/null +++ b/backend/internal/integration/testdata/news_article.html @@ -0,0 +1,36 @@ + + + + + Breaking News - Daily Times + + +

Scientists Discover New Species of Deep-Sea Fish

+

Published: January 15, 2026 by Jane Smith

+ +

Marine biologists from the Oceanographic Institute announced today the discovery of + a previously unknown species of deep-sea fish found in the Mariana Trench. The fish, + tentatively named Abyssopelagicus luminaris, was observed at depths exceeding 8,000 + meters during a recent expedition using advanced remotely operated vehicles.

+ +

The newly discovered species exhibits remarkable bioluminescent properties, producing + a bright blue-green light from specialized organs along its lateral line. Researchers + believe this adaptation helps the fish navigate the perpetual darkness of the deep ocean + and attract prey in an environment where food is scarce.

+ +

"This discovery challenges our understanding of what life can look like in extreme + environments," said Dr. Maria Rodriguez, lead researcher on the expedition. "The + bioluminescent patterns we observed are unlike anything previously documented in + deep-sea organisms."

+ +

The expedition, which lasted three months and covered over 200 square kilometers of + ocean floor, also documented several other unusual organisms including a new variety of + sea cucumber and a previously unknown species of polychaete worm. These findings + contribute to growing evidence that deep-sea biodiversity is far greater than + previously estimated.

+ +

The research team plans to return to the site later this year with improved equipment + to collect specimens for detailed genetic analysis. Their findings will be published in + the upcoming issue of the Journal of Marine Biology.

+ + diff --git a/backend/internal/integration/testdata/privacy_policy.html b/backend/internal/integration/testdata/privacy_policy.html new file mode 100644 index 0000000..e17a5b9 --- /dev/null +++ b/backend/internal/integration/testdata/privacy_policy.html @@ -0,0 +1,67 @@ + + + + + Privacy Policy - Example Corp + + +

Privacy Policy

+

Last updated: January 1, 2026

+ +

This privacy policy describes how Example Corp ("we", "us", "our") collects, uses, and + shares your personal information when you use our website and services. We are committed + to protecting your personal data and being transparent about our data practices.

+ +

1. Information We Collect

+

We collect personal information that you provide directly to us, including your name, + email address, phone number, and billing information when you create an account or make + a purchase. We also automatically collect certain information when you visit our website, + including your IP address, browser type, operating system, and browsing behavior through + cookies and similar tracking technologies.

+ +

2. How We Use Your Information

+

We use the personal data we collect to provide and improve our services, process + transactions, communicate with you about your account, send promotional materials (with + your consent), and comply with legal obligations. We may also use your data for analytics + purposes to understand how users interact with our services and to improve user experience.

+ +

3. Data Sharing and Disclosure

+

We may share your personal information with third-party service providers who assist us + in operating our services, such as payment processors, cloud hosting providers, and analytics + services. We may also share information with advertising partners to deliver targeted + advertisements. We require these partners to protect your data and use it only for the + purposes we specify. We may disclose your information if required by law or in response + to valid legal requests.

+ +

4. Your Rights and Choices

+

You have the right to access, correct, or delete your personal information. You may + opt out of marketing communications at any time. If you are a resident of the European + Economic Area, you have additional rights under the GDPR, including the right to data + portability and the right to restrict processing. California residents have rights under + the CCPA to know what data is collected and to request deletion.

+ +

5. Data Retention

+

We retain your personal information for as long as necessary to fulfill the purposes + for which it was collected, typically for the duration of your account plus an additional + three years for legal compliance. Transaction records are kept for seven years as required + by financial regulations. You may request deletion of your data at any time, subject to + legal retention requirements.

+ +

6. Security Measures

+

We implement appropriate technical and organizational security measures to protect your + personal data against unauthorized access, alteration, disclosure, or destruction. This + includes encryption of data in transit using TLS, encryption of sensitive data at rest, + regular security audits, and employee training on data protection practices.

+ +

7. Cookies and Tracking

+

We use cookies and similar technologies to enhance your browsing experience, analyze + site traffic, and personalize content. You can manage cookie preferences through your + browser settings. Some cookies are essential for site functionality, while others are + used for analytics and advertising purposes.

+ +

8. Contact Us

+

If you have questions about this privacy policy or our data practices, please contact + our Data Protection Officer at privacy@example.com or write to us at Example Corp, + 123 Privacy Lane, San Francisco, CA 94105.

+ + From 5b333ba5a2295602cc10b5fe7f1a6f7e919d87c6 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sun, 1 Mar 2026 16:08:41 -0500 Subject: [PATCH 4/4] get everything working, first dry run --- README.md | 516 ++++++++++++++++++++++++++ backend/README.md | 8 +- backend/internal/rag/pipeline.go | 1 - backend/internal/rag/pipeline_test.go | 7 +- docker-compose.yml | 16 +- 5 files changed, 532 insertions(+), 16 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..d47407e --- /dev/null +++ b/README.md @@ -0,0 +1,516 @@ +# SmolTerms + +A privacy policy and terms of service analyzer. A browser extension (Firefox + Chrome) extracts page content, sends it to a Go backend that uses a RAG pipeline and LLMs to produce multi-dimensional privacy scores. + +## How It Works + +``` +Browser Extension click + -> Content script extracts page HTML + -> Background worker POSTs to backend API + -> Backend: check cache (URL + content hash) + -> If cache miss: + HTML parse -> privacy policy detection -> text chunking + -> Embed chunks (OpenAI) -> store in Qdrant + -> Retrieve relevant chunks -> LLM analysis (Anthropic Claude) + -> Structured scoring -> cache result -> return response +``` + +### Scoring System + +Five dimensions, equally weighted (20% each), rated 1-10 (higher = better for user privacy): + +| Dimension | What It Measures | +|---|---| +| Data Collection | How much data is collected and whether it's minimized | +| Data Sharing | Whether data is shared/sold to third parties | +| User Rights | Access, deletion, portability, opt-out rights | +| Retention | How long data is kept and whether limits are defined | +| Security | Encryption, breach notification, security practices | + +**Risk Levels:** Low (8-10), Moderate (5-7.9), High (3-4.9), Critical (1-2.9) + +### API Endpoints + +| Method | Path | Description | +|---|---|---| +| `POST` | `/api/v1/analyze` | Submit HTML content for privacy analysis | +| `GET` | `/api/v1/health` | Health check (backend + Qdrant status) | + +--- + +## Prerequisites + +- **Go 1.22+** (for running the backend directly) +- **Docker + Docker Compose** or **Podman + Podman Compose** (for containerized setup) +- **Anthropic API key** (for LLM analysis) +- **OpenAI API key** (for text embeddings) + +--- + +## Getting API Keys + +### Anthropic API Key (for Claude LLM) + +SmolTerms uses Claude Sonnet 4.5 for analyzing privacy policies and generating structured scores. + +**1. Create an account:** +- Go to [console.anthropic.com](https://console.anthropic.com) and sign up +- Verify your email address + +**2. Add credits:** +- Navigate to **Settings > Billing** in the Console +- Add a payment method and purchase credits +- A minimum $5 purchase gets you to **Tier 1** (50 RPM, 30K input tokens/min) +- $40 cumulative gets you to **Tier 2** (1,000 RPM, 450K input tokens/min), which is more than enough for development + +**3. Generate an API key:** +- Go to [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) +- Click **Create Key**, give it a name (e.g., "smolterms-dev") +- Copy the key immediately -- it starts with `sk-ant-` and won't be shown again + +**4. Set spending limits (recommended):** +- Go to **Settings > Limits** in the Console +- Set a monthly spend limit (e.g., $10-20 for development) +- You can also configure per-workspace limits for tighter control + +**Pricing for SmolTerms usage (Claude Sonnet 4.5):** +- Input: $3 / million tokens +- Output: $15 / million tokens +- A typical privacy policy analysis uses ~5K-15K input tokens and ~1K-2K output tokens +- Estimated cost per analysis: ~$0.02-$0.06 + +**Rate limits by tier:** + +| Tier | Credit Purchase | RPM | Input Tokens/min | +|---|---|---|---| +| Tier 1 | $5 | 50 | 30,000 | +| Tier 2 | $40 | 1,000 | 450,000 | +| Tier 3 | $200 | 2,000 | 800,000 | + +For development and testing, Tier 1 is sufficient. You'll automatically advance tiers as your cumulative credit purchases increase. + +### OpenAI API Key (for Embeddings) + +SmolTerms uses OpenAI's `text-embedding-3-small` model (1536 dimensions) to generate vector embeddings for the RAG pipeline. + +**1. Create an account:** +- Go to [platform.openai.com](https://platform.openai.com) and sign up +- Verify your email and phone number + +**2. Add credits:** +- Navigate to **Settings > Billing** in the dashboard +- Add a payment method and purchase credits +- $5 gets you to **Tier 1** (which is sufficient for development) + +**3. Generate an API key:** +- Go to [platform.openai.com/api-keys](https://platform.openai.com/api-keys) +- Click **Create new secret key**, give it a name (e.g., "smolterms-dev") +- Copy the key immediately -- it starts with `sk-` and won't be shown again + +**4. Set usage limits (recommended):** +- Go to **Settings > Limits** in the dashboard +- Set a monthly budget limit (e.g., $5-10 for development) +- You can set both a soft limit (email alert) and a hard limit (requests blocked) + +**Pricing for SmolTerms usage (text-embedding-3-small):** +- $0.02 / million tokens +- A typical privacy policy (~10K tokens across all chunks) costs ~$0.0002 to embed +- Embeddings are extremely cheap -- the OpenAI costs for SmolTerms are negligible + +**Rate limits by tier:** + +| Tier | Qualification | Usage Limit/month | +|---|---|---| +| Free | Allowed geography | $100/month | +| Tier 1 | $5 paid | $100/month | +| Tier 2 | $50 paid + 7 days | $500/month | + +For development, the free tier or Tier 1 is more than adequate. + +--- + +## Environment Setup + +Copy the example environment file and fill in your API keys: + +```bash +cp .env.example .env +``` + +Edit `.env` with your actual values: + +```bash +# HTTP server port (default: 8080) +PORT=8080 + +# Log level: debug, info, warn, error (default: info) +LOG_LEVEL=info + +# Anthropic API key (required) - get from https://console.anthropic.com/settings/keys +ANTHROPIC_API_KEY=sk-ant-your-key-here + +# OpenAI API key (required) - get from https://platform.openai.com/api-keys +OPENAI_API_KEY=sk-your-key-here + +# Qdrant gRPC address (default: localhost:6334) +# When running with Docker/Podman Compose, this is overridden to qdrant:6334 +QDRANT_URL=localhost:6334 + +# Cache TTL for analysis results (default: 720h = 30 days) +CACHE_DEFAULT_TTL=720h +``` + +> **Important:** Never commit your `.env` file. It is already in `.gitignore`. + +--- + +## Local Development + +### Option A: Docker Compose + +Requires [Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) (v2+). + +**Start the full stack (backend + Qdrant):** + +```bash +docker compose up --build +``` + +This will: +1. Build the Go backend from `backend/Dockerfile` (multi-stage, distroless image) +2. Pull and start the `qdrant/qdrant` container +3. Wait for Qdrant's health check before starting the backend +4. Expose the backend on `http://localhost:8080` +5. Expose Qdrant's HTTP API on `http://localhost:6333` and gRPC on `localhost:6334` + +**Run in the background:** + +```bash +docker compose up --build -d +``` + +**View logs:** + +```bash +docker compose logs -f # all services +docker compose logs -f backend # backend only +docker compose logs -f qdrant # qdrant only +``` + +**Stop everything:** + +```bash +docker compose down # stop containers (data persists in volume) +docker compose down -v # stop containers AND remove Qdrant data volume +``` + +### Option B: Podman Compose + +Requires [Podman](https://podman.io/docs/installation) and [Podman Compose](https://github.com/containers/podman-compose). + +**Start the full stack:** + +```bash +podman compose up --build +``` + +**Run in the background:** + +```bash +podman compose up --build -d +``` + +**View logs:** + +```bash +podman compose logs -f +podman compose logs -f backend +podman compose logs -f qdrant +``` + +**Stop everything:** + +```bash +podman compose down +podman compose down -v # also remove Qdrant data volume +``` + +> **Note:** Podman Compose reads the same `docker-compose.yml` file. If you encounter networking issues with Podman, you may need to ensure the `podman` socket is running (`systemctl --user start podman.socket`) or use `podman-compose` (the Python-based variant) instead of the Go-based `podman compose`. + +### Option C: Run Backend Directly (without containers) + +If you prefer to run Go directly, you'll need Qdrant running separately. + +**1. Start Qdrant (pick one):** + +```bash +# Docker: +docker run -d --name qdrant -p 6333:6333 -p 6334:6334 \ + -v qdrant_data:/qdrant/storage qdrant/qdrant + +# Podman: +podman run -d --name qdrant -p 6333:6333 -p 6334:6334 \ + -v qdrant_data:/qdrant/storage qdrant/qdrant +``` + +**2. Run the backend:** + +```bash +# Make sure .env is configured with QDRANT_URL=localhost:6334 +go run ./backend/cmd/server/main.go +``` + +### Verifying the Setup + +Once everything is running, check the health endpoint: + +```bash +curl http://localhost:8080/api/v1/health +``` + +Expected response: + +```json +{ + "status": "healthy", + "services": { + "qdrant": { "status": "healthy" }, + "anthropic": { "status": "configured" }, + "openai": { "status": "configured" } + } +} +``` + +Try an analysis (requires valid API keys): + +```bash +curl -X POST http://localhost:8080/api/v1/analyze \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/privacy", + "html": "Privacy Policy

Privacy Policy

We collect your personal information including name, email, and browsing data. We share this data with third-party advertisers. You have no right to delete your data. We retain data indefinitely. We use industry-standard security.

" + }' +``` + +--- + +## Testing + +### Unit Tests + +Run all unit tests (no external services needed): + +```bash +go test ./backend/... +``` + +With coverage: + +```bash +go test ./backend/... -cover +``` + +### Integration Tests + +Integration tests exercise the full pipeline against real services. They require: +- Running Qdrant instance +- Valid `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` in your environment + +```bash +# Start Qdrant first (via Compose or standalone container) + +# Run integration tests with the build tag: +go test -tags=integration ./backend/internal/integration/... -v -timeout 120s +``` + +Integration tests are gated behind the `//go:build integration` build tag, so they won't run during normal `go test ./backend/...`. + +--- + +## Deploying to a Server + +### Build the Docker Image + +```bash +docker build -t smolterms-backend ./backend +``` + +Or with Podman: + +```bash +podman build -t smolterms-backend ./backend +``` + +The Dockerfile uses a multi-stage build: +1. **Build stage:** Compiles a static Go binary with `CGO_ENABLED=0` +2. **Runtime stage:** Uses `gcr.io/distroless/static-debian12` (minimal, no shell, ~2MB base) + +The resulting image is small and contains only the compiled binary. + +### Deploying with Docker Compose on a Server + +**1. Copy project files to your server:** + +```bash +scp -r docker-compose.yml backend/Dockerfile .env.example user@server:/opt/smolterms/ +``` + +**2. On the server, create your `.env` file:** + +```bash +cd /opt/smolterms +cp .env.example .env +# Edit .env with production API keys and settings +``` + +**3. Start the services:** + +```bash +docker compose up --build -d +``` + +**4. (Optional) Set up a reverse proxy:** + +Put Nginx, Caddy, or Traefik in front of port 8080 to handle TLS and domain routing. Example Nginx config: + +```nginx +server { + listen 443 ssl; + server_name smolterms.example.com; + + ssl_certificate /path/to/cert.pem; + ssl_certificate_key /path/to/key.pem; + + location / { + proxy_pass http://127.0.0.1:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 120s; # analysis can take 10-30s + } +} +``` + +### Deploying the Standalone Binary + +If you prefer not to use containers on the server: + +**1. Build the binary locally (cross-compile for Linux):** + +```bash +CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o smolterms-server ./backend/cmd/server/main.go +``` + +For ARM servers (e.g., AWS Graviton): + +```bash +CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o smolterms-server ./backend/cmd/server/main.go +``` + +**2. Copy to server and run:** + +```bash +scp smolterms-server user@server:/opt/smolterms/ +ssh user@server + +# Set environment variables +export ANTHROPIC_API_KEY="sk-ant-..." +export OPENAI_API_KEY="sk-..." +export QDRANT_URL="localhost:6334" + +# Run +/opt/smolterms/smolterms-server +``` + +**3. Run Qdrant on the server:** + +```bash +docker run -d --name qdrant \ + -p 6333:6333 -p 6334:6334 \ + -v /opt/smolterms/qdrant_data:/qdrant/storage \ + --restart unless-stopped \ + qdrant/qdrant +``` + +**4. (Optional) Create a systemd service:** + +```ini +# /etc/systemd/system/smolterms.service +[Unit] +Description=SmolTerms Backend +After=network.target + +[Service] +Type=simple +User=smolterms +WorkingDirectory=/opt/smolterms +ExecStart=/opt/smolterms/smolterms-server +EnvironmentFile=/opt/smolterms/.env +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target +``` + +```bash +sudo systemctl daemon-reload +sudo systemctl enable --now smolterms +``` + +### Production Checklist + +- [ ] Set `LOG_LEVEL=warn` or `LOG_LEVEL=error` for production +- [ ] Set spending limits on both Anthropic and OpenAI dashboards +- [ ] Put a reverse proxy (Nginx/Caddy) in front for TLS termination +- [ ] Secure Qdrant -- by default it has no authentication; consider binding to localhost only or adding an API key via [Qdrant security config](https://qdrant.tech/documentation/guides/security/) +- [ ] Set up monitoring for the `/api/v1/health` endpoint +- [ ] Configure firewall rules -- only expose ports 80/443 publicly, keep 8080/6333/6334 internal +- [ ] Back up the Qdrant volume periodically (or use [Qdrant snapshots](https://qdrant.tech/documentation/concepts/snapshots/)) +- [ ] Consider rate limiting at the reverse proxy level to prevent abuse + +--- + +## Project Structure + +``` +smolterms/ +├── backend/ +│ ├── cmd/server/main.go # Application entrypoint +│ ├── Dockerfile # Multi-stage Docker build +│ └── internal/ +│ ├── analyzer/ # Full pipeline orchestration, scoring +│ ├── api/ # HTTP handlers, middleware, routing +│ ├── cache/ # Cache interface + in-memory implementation +│ ├── config/ # Environment variable loading +│ ├── embedding/ # EmbeddingClient interface + OpenAI impl +│ ├── extractor/ # HTML parsing, chunking, policy detection +│ ├── integration/ # End-to-end integration tests +│ ├── llm/ # LLMClient interface + Anthropic impl +│ ├── rag/ # RAG pipeline (store + retrieve) +│ ├── types/ # Shared request/response types +│ └── vectorstore/ # VectorStore interface + Qdrant impl +├── extension/ # Browser extension (Firefox + Chrome) +├── docker-compose.yml # Local dev: backend + Qdrant +├── .env.example # Environment variable template +├── go.mod +└── go.sum +``` + +## Tech Stack + +| Component | Technology | +|---|---| +| Backend | Go 1.22+, stdlib `net/http` | +| LLM | Anthropic Claude Sonnet 4.5 | +| Embeddings | OpenAI `text-embedding-3-small` (1536 dims) | +| Vector DB | Qdrant (gRPC) | +| Caching | go-cache (in-memory) | +| Configuration | Environment variables (12-factor) | +| Extension | Vanilla JS, Manifest V3 | + +## License + +TBD diff --git a/backend/README.md b/backend/README.md index c1d5878..7af4f65 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,5 +1,7 @@ ## Todo -[] - Make the request timeout an env config option -[] - Decide how to prevent multiple requests by same user or different users - how to set up limits and prevent DOS -[] - Testing metrics with some privacy policy datasets to evaluate model and chunking/retrieval strategies \ No newline at end of file +- [ ] Make the request timeout an env config option +- [ ] Decide how to prevent multiple requests by same user or different users - how to set up limits and prevent DOS +- [ ] Testing metrics with some privacy policy datasets to evaluate model and chunking/retrieval strategies +- [ ] For website, have a slider section, right side t&c left side scoring result, to demonstrate the results +- [ ] Add application-side retry logic with backoff for Qdrant connection on startup \ No newline at end of file diff --git a/backend/internal/rag/pipeline.go b/backend/internal/rag/pipeline.go index 1057641..9987930 100644 --- a/backend/internal/rag/pipeline.go +++ b/backend/internal/rag/pipeline.go @@ -56,7 +56,6 @@ func (p *Pipeline) Store(ctx context.Context, url string, contentHash string, ch vsChunks := make([]vectorstore.Chunk, len(chunks)) for i, c := range chunks { vsChunks[i] = vectorstore.Chunk{ - ID: fmt.Sprintf("%s:%d", contentHash, c.Index), Text: c.Text, Index: c.Index, Section: c.Section, diff --git a/backend/internal/rag/pipeline_test.go b/backend/internal/rag/pipeline_test.go index 87950fc..53d65d0 100644 --- a/backend/internal/rag/pipeline_test.go +++ b/backend/internal/rag/pipeline_test.go @@ -3,7 +3,6 @@ package rag import ( "context" "errors" - "fmt" "log/slog" "testing" @@ -103,9 +102,9 @@ func TestStore_ConvertChunksMetadata(t *testing.T) { upserted := store.UpsertCalls[0].Chunks for i, c := range upserted { - wantID := fmt.Sprintf("hash456:%d", i) - if c.ID != wantID { - t.Errorf("chunk[%d].ID = %q, want %q", i, c.ID, wantID) + // ID is left empty so QdrantStore.chunkUUID generates a valid UUID at upsert time + if c.ID != "" { + t.Errorf("chunk[%d].ID = %q, want empty (deferred to vectorstore)", i, c.ID) } if c.URL != "https://example.com/policy" { t.Errorf("chunk[%d].URL = %q, want %q", i, c.URL, "https://example.com/policy") diff --git a/docker-compose.yml b/docker-compose.yml index f321d17..71f2969 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,9 @@ services: build: context: . dockerfile: backend/Dockerfile + security_opt: + - seccomp=unconfined + - label=disable ports: - "8080:8080" env_file: @@ -10,22 +13,19 @@ services: environment: - QDRANT_URL=qdrant:6334 depends_on: - qdrant: - condition: service_healthy + - qdrant qdrant: - image: qdrant/qdrant:latest + image: docker.io/qdrant/qdrant:latest + security_opt: + - seccomp=unconfined + - label=disable ports: - "6333:6333" - "6334:6334" volumes: - qdrant_data:/qdrant/storage restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"] - interval: 10s - timeout: 5s - retries: 3 volumes: qdrant_data: