From 217a3e9a3cec976b064fb77d0d176c53a1dacfbd Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sat, 28 Feb 2026 22:23:19 -0500 Subject: [PATCH 1/5] feat(website): add static landing page for SmolTerms Build a single-page static site at website/ with Tailwind CSS (CDN), explaining the project, its scoring system, and linking to browser extension install pages. Includes placeholder section for URL analysis feature (task-03). Responsive design with pastel offwhite color palette. Assisted by the code-assist SOP --- .../project-landing-page/context.md | 35 ++ .../project-landing-page/plan.md | 33 ++ .../project-landing-page/progress.md | 21 ++ website/README.md | 41 +++ website/index.html | 309 ++++++++++++++++++ website/script.js | 28 ++ 6 files changed, 467 insertions(+) create mode 100644 .agents/scratchpad/2026-02-15-smolterms/project-landing-page/context.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/project-landing-page/plan.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/project-landing-page/progress.md create mode 100644 website/README.md create mode 100644 website/index.html create mode 100644 website/script.js diff --git a/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/context.md b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/context.md new file mode 100644 index 0000000..e99a923 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/context.md @@ -0,0 +1,35 @@ +# Context: SmolTerms Project Landing Page + +## Requirements +- Static landing page at `website/` directory +- Vanilla HTML + Tailwind CSS (CDN) + minimal JS +- No build system required +- Light pastel/offwhite color palette with darker contrast buttons +- Responsive (mobile 375px, desktop 1280px) + +## Sections Required +1. Hero/Header - project name, tagline +2. What it does - privacy policy analyzer, 5 dimensions +3. Motivation - why privacy policies need simplification +4. How it works - extension flow explanation +5. Install - Firefox/Chrome buttons (placeholder URLs) +6. Scoring explanation - 5 dimensions + 4 risk levels with colors +7. Try It (placeholder) - `
` for URL analysis (task-03) +8. Footer - GitHub repo, license + +## Design Decisions +- **Tailwind CSS via CDN** - user preference, no build step needed +- **Color palette**: Offwhite backgrounds (#f8f7f4 / #faf9f6), darker offwhite buttons (#e8e5df / #d4d0c8) +- **Risk level colors from design doc**: Green (8-10), Yellow (5-0-7.9), Orange (3-4.9), Red (1-2.9) +- **Font**: Inter via Google Fonts for clean modern look + +## Scoring System (from design doc) +| Dimension | What it measures | +|-----------|-----------------| +| Data Collection | How much personal data is collected | +| Data Sharing | Whether data is shared/sold to third parties | +| User Rights | Ability to access, delete, export data | +| Retention | How long data is kept | +| Security | Security measures, encryption, breach notification | + +Risk levels: Low (8-10, green), Moderate (5-7.9, yellow), High (3-4.9, orange), Critical (1-2.9, red) diff --git a/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/plan.md b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/plan.md new file mode 100644 index 0000000..90e7a00 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/plan.md @@ -0,0 +1,33 @@ +# Plan: SmolTerms Project Landing Page + +## Test Strategy +Since this is a static HTML/CSS/JS page with no build system, traditional unit tests don't apply. Validation: +- Manual browser check: all sections render correctly +- HTML structure verification: `
` placeholder exists +- Responsive: verify layout at 375px and 1280px viewports +- All required sections present in HTML + +## Implementation Plan + +### Files to Create +1. `website/index.html` - Main page with all sections, Tailwind CDN +2. `website/script.js` - Smooth scrolling, mobile nav toggle +3. `website/README.md` - Local dev and GitHub Pages deployment instructions + +### Design Approach +- Tailwind CSS via CDN (no build step) +- Google Fonts: Inter +- Color palette: offwhite backgrounds, darker offwhite/warm gray buttons +- Risk level colors used as accents in scoring section +- Clean, minimal layout with generous whitespace + +### Section Breakdown +1. **Nav** - Fixed top, logo + nav links +2. **Hero** - Large heading, tagline, CTA buttons +3. **What it does** - Brief feature overview +4. **Motivation** - Why SmolTerms exists +5. **How it works** - 3-step visual flow +6. **Install** - Browser extension download buttons +7. **Scoring** - 5 dimensions grid + risk level legend +8. **Try It** - Placeholder section for URL analysis +9. **Footer** - Links, license diff --git a/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/progress.md b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/progress.md new file mode 100644 index 0000000..4464106 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/project-landing-page/progress.md @@ -0,0 +1,21 @@ +# Progress: SmolTerms Project Landing Page + +## Setup +- [x] Create documentation directory +- [x] Read task file and design document +- [x] Create context.md + +## Implementation +- [x] Create `website/` directory structure +- [x] Build `index.html` with all sections (hero, features, motivation, how-it-works, install, scoring, try-it, footer) +- [x] Style with Tailwind CSS (CDN) - pastel/offwhite palette with custom cream color scale +- [x] Add `script.js` for smooth scrolling and mobile nav toggle +- [x] Create `website/README.md` with local dev and GitHub Pages deploy instructions +- [x] Validate: all acceptance criteria met (sections, scoring, responsive classes, try-it placeholder) +- [ ] Commit + +## Decisions +- Used Tailwind CDN with custom config (cream color palette, risk level colors) +- Inter font via Google Fonts for clean modern look +- SVG icons inline (no external icon library dependency) +- Firefox/Chrome install buttons use placeholder `#` URLs diff --git a/website/README.md b/website/README.md new file mode 100644 index 0000000..f969b05 --- /dev/null +++ b/website/README.md @@ -0,0 +1,41 @@ +# SmolTerms Website + +Static landing page for the SmolTerms project. + +## Local Development + +No build step required. Open `index.html` directly in your browser: + +```bash +# From the project root +open website/index.html + +# Or use a local server (optional, for consistent behavior) +cd website && python3 -m http.server 3000 +``` + +Then visit `http://localhost:3000`. + +## Deployment (GitHub Pages) + +1. In the GitHub repository, go to **Settings > Pages** +2. Set the source to **Deploy from a branch** +3. Select the branch and set the folder to `/website` +4. Save — the site will be live at `https://.github.io/smolterms/` + +No build step or CI configuration is needed. GitHub Pages serves the static files directly. + +## Structure + +``` +website/ +├── index.html # Main page (all sections) +├── script.js # Smooth scrolling, mobile nav +└── README.md # This file +``` + +## Tech + +- **Tailwind CSS** via CDN (no build) +- **Inter** font via Google Fonts +- Vanilla JavaScript (no framework) diff --git a/website/index.html b/website/index.html new file mode 100644 index 0000000..b4e347f --- /dev/null +++ b/website/index.html @@ -0,0 +1,309 @@ + + + + + + SmolTerms - Understand Privacy Policies in Seconds + + + + + + + + + + + + + +
+
+

+ Understand Privacy Policies
in Seconds +

+

+ SmolTerms is a browser extension that reads the fine print so you don't have to. Get clear privacy scores across 5 dimensions before you click "I agree." +

+ +
+
+ + +
+
+

What SmolTerms Does

+

+ One click to analyze any privacy policy. SmolTerms breaks down complex legal language into clear, actionable scores. +

+
+
+
+ + + +
+

Privacy Scoring

+

+ Get scores across 5 privacy dimensions — data collection, sharing, user rights, retention, and security — rated 1 to 10. +

+
+
+
+ + + +
+

AI-Powered Analysis

+

+ Uses a RAG pipeline and LLMs to read and understand privacy policies the way a privacy expert would. +

+
+
+
+ + + +
+

Key Concerns

+

+ Highlights the most important privacy concerns so you know exactly what to watch out for. +

+
+
+
+
+ + +
+
+

Why SmolTerms Exists

+

+ The average privacy policy is over 4,000 words long. It would take roughly 18 minutes to read — and most people encounter dozens of them every year. Nobody reads them, but everyone agrees to them. +

+

+ SmolTerms exists because you deserve to know what you're agreeing to without spending hours reading legal jargon. One click, clear scores, and the key concerns — that's it. +

+
+
+ + +
+
+

How It Works

+
+
+
+ 1 +
+

Visit a Page

+

+ Navigate to any website's privacy policy or terms of service page. +

+
+
+
+ 2 +
+

Click the Extension

+

+ Click the SmolTerms icon in your browser toolbar. The extension extracts the page content and sends it for analysis. +

+
+
+
+ 3 +
+

Get Your Scores

+

+ Within seconds, see privacy scores across 5 dimensions, key concerns, and an overall risk level. +

+
+
+
+
+ + +
+
+

Get SmolTerms

+

Available for Firefox and Chrome. Free and open source.

+ +

Extension store links will be available once published.

+
+
+ + +
+
+

Privacy Scoring System

+

+ Every policy is scored across 5 dimensions on a scale of 1 to 10, where higher is better for your privacy. +

+ + +
+
+

Data Collection

+

How much personal data is collected, what types, and whether collection is proportionate to the service.

+
+
+

Data Sharing

+

Whether data is shared or sold to third parties and under what conditions.

+
+
+

User Rights

+

Your ability to access, delete, and export your data, plus opt-out mechanisms.

+
+
+

Retention

+

How long your data is kept and whether retention periods are clearly defined.

+
+
+

Security

+

Security measures mentioned including encryption, breach notification policies, and data protection practices.

+
+
+ + +

Risk Levels

+
+
+
+
+ Low Risk + 8.0 – 10.0 +
+
+
+
+
+ Moderate + 5.0 – 7.9 +
+
+
+
+
+ High Risk + 3.0 – 4.9 +
+
+
+
+
+ Critical + 1.0 – 2.9 +
+
+
+
+
+ + +
+
+

Try It Out

+

+ Paste a URL to analyze its privacy policy directly from this page. +

+
+

URL analysis feature coming soon.

+
+
+
+ + +
+
+
+ SmolTerms + + GitHub + +
+

+ Open source under the MIT License. +

+
+
+ + + + diff --git a/website/script.js b/website/script.js new file mode 100644 index 0000000..6228274 --- /dev/null +++ b/website/script.js @@ -0,0 +1,28 @@ +// Mobile menu toggle +const menuBtn = document.getElementById('mobile-menu-btn'); +const mobileMenu = document.getElementById('mobile-menu'); + +menuBtn.addEventListener('click', () => { + mobileMenu.classList.toggle('hidden'); +}); + +// Close mobile menu when a link is clicked +mobileMenu.querySelectorAll('a').forEach(link => { + link.addEventListener('click', () => { + mobileMenu.classList.add('hidden'); + }); +}); + +// Smooth scrolling for anchor links +document.querySelectorAll('a[href^="#"]').forEach(anchor => { + anchor.addEventListener('click', (e) => { + const targetId = anchor.getAttribute('href'); + if (targetId === '#') return; + + const target = document.querySelector(targetId); + if (target) { + e.preventDefault(); + target.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } + }); +}); From 3b297c6ac7782eee3fea59d72a5be31dc7160ae0 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sat, 28 Feb 2026 23:51:16 -0500 Subject: [PATCH 2/5] feat(api): add POST /api/v1/analyze-url endpoint with SSRF protection and rate limiting Add server-side URL fetching endpoint that accepts a URL, fetches HTML content, and runs it through the existing analysis pipeline. Includes: - SSRF validator: blocks private IPs, loopback, link-local, non-HTTP schemes, with DNS resolution to prevent rebinding attacks - Per-IP rate limiter: sliding window (5 req/min), returns 429 with Retry-After - URL fetcher: configurable body size limit (5MB), redirect following with SSRF validation on each hop, content-type checking, descriptive error messages suggesting browser extension fallback - Configurable CORS origins via CORS_ALLOWED_ORIGINS env var (default: *) - Request type in types/request.go per coding standards Assisted by the code-assist SOP --- backend/internal/api/fetcher.go | 89 ++++++++++ backend/internal/api/fetcher_test.go | 172 ++++++++++++++++++++ backend/internal/api/handler.go | 68 ++++++++ backend/internal/api/handler_test.go | 164 +++++++++++++++++++ backend/internal/api/middleware.go | 52 ++++-- backend/internal/api/middleware_test.go | 36 ++++- backend/internal/api/ratelimiter.go | 89 ++++++++++ backend/internal/api/ratelimiter_test.go | 189 ++++++++++++++++++++++ backend/internal/api/router.go | 20 ++- backend/internal/api/urlvalidator.go | 77 +++++++++ backend/internal/api/urlvalidator_test.go | 119 ++++++++++++++ backend/internal/config/config.go | 16 +- backend/internal/types/request.go | 6 + 13 files changed, 1073 insertions(+), 24 deletions(-) create mode 100644 backend/internal/api/fetcher.go create mode 100644 backend/internal/api/fetcher_test.go create mode 100644 backend/internal/api/ratelimiter.go create mode 100644 backend/internal/api/ratelimiter_test.go create mode 100644 backend/internal/api/urlvalidator.go create mode 100644 backend/internal/api/urlvalidator_test.go create mode 100644 backend/internal/types/request.go diff --git a/backend/internal/api/fetcher.go b/backend/internal/api/fetcher.go new file mode 100644 index 0000000..4a783c8 --- /dev/null +++ b/backend/internal/api/fetcher.go @@ -0,0 +1,89 @@ +package api + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" +) + +const ( + defaultMaxBodySize = 5 * 1024 * 1024 // 5MB + defaultUserAgent = "SmolTerms/1.0 (+https://github.com/Parth576/smolterms)" + defaultMaxRedirects = 5 +) + +// Fetcher handles server-side HTTP fetching with SSRF protection and safety limits. +type Fetcher struct { + MaxBodySize int64 + UserAgent string + MaxRedirects int + Client *http.Client + // ValidateFunc validates a URL before fetching. Defaults to ValidateURL. + ValidateFunc func(string) error +} + +// NewFetcher creates a Fetcher with default settings. +func NewFetcher() *Fetcher { + f := &Fetcher{ + MaxBodySize: defaultMaxBodySize, + UserAgent: defaultUserAgent, + MaxRedirects: defaultMaxRedirects, + ValidateFunc: ValidateURL, + } + f.Client = &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= f.MaxRedirects { + return fmt.Errorf("too many redirects (max %d)", f.MaxRedirects) + } + if err := f.ValidateFunc(req.URL.String()); err != nil { + return fmt.Errorf("redirect blocked: %w", err) + } + return nil + }, + } + return f +} + +// Fetch downloads the page at rawURL and returns the HTML body. +// It validates the URL for SSRF, checks content type, and limits body size. +func (f *Fetcher) Fetch(ctx context.Context, rawURL string) (string, error) { + if err := f.ValidateFunc(rawURL); err != nil { + return "", fmt.Errorf("url validation failed: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("User-Agent", f.UserAgent) + req.Header.Set("Accept", "text/html") + + resp, err := f.Client.Do(req) + if err != nil { + return "", fmt.Errorf("failed to fetch url: %w. Try using the browser extension instead", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("server returned status %d. The page may be restricted — try using the browser extension instead", resp.StatusCode) + } + + ct := resp.Header.Get("Content-Type") + if !strings.Contains(strings.ToLower(ct), "text/html") { + return "", fmt.Errorf("unexpected content type %q: expected text/html", ct) + } + + // Read up to MaxBodySize + 1 byte to detect oversized responses. + limited := io.LimitReader(resp.Body, f.MaxBodySize+1) + body, err := io.ReadAll(limited) + if err != nil { + return "", fmt.Errorf("failed to read response body: %w", err) + } + if int64(len(body)) > f.MaxBodySize { + return "", fmt.Errorf("response body too large (exceeds %d bytes)", f.MaxBodySize) + } + + return string(body), nil +} diff --git a/backend/internal/api/fetcher_test.go b/backend/internal/api/fetcher_test.go new file mode 100644 index 0000000..6ce4dac --- /dev/null +++ b/backend/internal/api/fetcher_test.go @@ -0,0 +1,172 @@ +package api + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +// noopValidator skips SSRF validation for httptest servers (which use 127.0.0.1). +func noopValidator(_ string) error { return nil } + +// newTestFetcher creates a Fetcher with SSRF validation disabled for local test servers. +func newTestFetcher() *Fetcher { + f := NewFetcher() + f.ValidateFunc = noopValidator + return f +} + +func TestFetcher(t *testing.T) { + t.Run("fetches HTML content successfully", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + fmt.Fprint(w, "Privacy Policy") + })) + defer ts.Close() + + f := newTestFetcher() + html, err := f.Fetch(context.Background(), ts.URL) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(html, "Privacy Policy") { + t.Errorf("html = %q, want it to contain 'Privacy Policy'", html) + } + }) + + t.Run("sets correct User-Agent header", func(t *testing.T) { + var gotUA string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + w.Header().Set("Content-Type", "text/html") + fmt.Fprint(w, "") + })) + defer ts.Close() + + f := newTestFetcher() + f.Fetch(context.Background(), ts.URL) + + if !strings.Contains(gotUA, "SmolTerms") { + t.Errorf("User-Agent = %q, want it to contain 'SmolTerms'", gotUA) + } + }) + + t.Run("rejects non-HTML content type", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"key":"value"}`) + })) + defer ts.Close() + + f := newTestFetcher() + _, err := f.Fetch(context.Background(), ts.URL) + if err == nil { + t.Error("expected error for non-HTML content type") + } + if !strings.Contains(err.Error(), "content type") { + t.Errorf("error = %q, want it to mention 'content type'", err) + } + }) + + t.Run("rejects oversized response body", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + // Write more than 1KB. + for i := 0; i < 2; i++ { + fmt.Fprint(w, strings.Repeat("x", 1024)) + } + })) + defer ts.Close() + + f := newTestFetcher() + f.MaxBodySize = 1024 // 1KB for testing + _, err := f.Fetch(context.Background(), ts.URL) + if err == nil { + t.Error("expected error for oversized body") + } + if !strings.Contains(err.Error(), "too large") { + t.Errorf("error = %q, want it to mention 'too large'", err) + } + }) + + t.Run("SSRF-blocked URLs return error", func(t *testing.T) { + f := NewFetcher() // Use real validator + _, err := f.Fetch(context.Background(), "http://127.0.0.1:8080/secret") + if err == nil { + t.Error("expected error for SSRF-blocked URL") + } + }) + + t.Run("respects context cancellation", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(5 * time.Second) + w.Header().Set("Content-Type", "text/html") + fmt.Fprint(w, "") + })) + defer ts.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + f := newTestFetcher() + _, err := f.Fetch(ctx, ts.URL) + if err == nil { + t.Error("expected error from context timeout") + } + }) + + t.Run("follows redirects", func(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/redirect", func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "/final", http.StatusFound) + }) + mux.HandleFunc("/final", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + fmt.Fprint(w, "Final page") + }) + ts := httptest.NewServer(mux) + defer ts.Close() + + f := newTestFetcher() + html, err := f.Fetch(context.Background(), ts.URL+"/redirect") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(html, "Final page") { + t.Errorf("html = %q, want it to contain 'Final page'", html) + } + }) + + t.Run("unreachable URL returns descriptive error", func(t *testing.T) { + // Use a closed server to simulate unreachable URL without network timeout. + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + url := ts.URL + ts.Close() + + f := newTestFetcher() + _, err := f.Fetch(context.Background(), url) + if err == nil { + t.Error("expected error for unreachable URL") + } + }) + + t.Run("returns error with extension suggestion on failure", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + })) + defer ts.Close() + + f := newTestFetcher() + _, err := f.Fetch(context.Background(), ts.URL) + if err == nil { + t.Error("expected error for 403 response") + } + if !strings.Contains(err.Error(), "extension") { + t.Errorf("error = %q, want it to suggest using the browser extension", err) + } + }) +} diff --git a/backend/internal/api/handler.go b/backend/internal/api/handler.go index 16ef51d..9aecf4a 100644 --- a/backend/internal/api/handler.go +++ b/backend/internal/api/handler.go @@ -105,6 +105,74 @@ func NewHealthHandler(cfg *config.Config, qdrantCheck func(ctx context.Context) } } +// NewAnalyzeURLHandler returns a handler that accepts a URL, fetches its content +// server-side, and runs the analysis pipeline on the fetched HTML. +// If pipeline is nil, the handler returns 503 Service Unavailable. +// If logger is nil, the default slog logger is used. +func NewAnalyzeURLHandler(pipeline PipelineRunner, fetcher *Fetcher, logger *slog.Logger) http.HandlerFunc { + if logger == nil { + logger = slog.Default() + } + return func(w http.ResponseWriter, r *http.Request) { + if pipeline == nil { + WriteError(w, http.StatusServiceUnavailable, "Analysis service not configured") + return + } + + var req types.AnalyzeURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + WriteError(w, http.StatusBadRequest, "Invalid JSON in request body") + return + } + + if req.URL == "" { + WriteError(w, http.StatusBadRequest, "The url field is required and must be non-empty") + return + } + + requestID := RequestIDFromContext(r.Context()) + + html, err := fetcher.Fetch(r.Context(), req.URL) + if err != nil { + logger.Error("failed to fetch url", + "url", req.URL, + "request_id", requestID, + "error", err, + ) + WriteError(w, http.StatusBadGateway, "Failed to fetch the page. Try using the browser extension instead.") + return + } + + analysisReq := analyzer.AnalysisRequest{ + URL: req.URL, + HTML: html, + } + + result, err := pipeline.Analyze(r.Context(), analysisReq) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + logger.Error("analysis timed out", + "url", req.URL, + "request_id", requestID, + "error", err, + ) + WriteError(w, http.StatusGatewayTimeout, "Analysis timed out. Please try again.") + return + } + + logger.Error("analysis failed", + "url", req.URL, + "request_id", requestID, + "error", err, + ) + WriteError(w, http.StatusBadGateway, "Analysis service temporarily unavailable. Please try again.") + return + } + + WriteJSON(w, http.StatusOK, result) + } +} + func apiKeyStatus(key string) string { if key != "" { return "configured" diff --git a/backend/internal/api/handler_test.go b/backend/internal/api/handler_test.go index c1a996c..802f37c 100644 --- a/backend/internal/api/handler_test.go +++ b/backend/internal/api/handler_test.go @@ -411,3 +411,167 @@ func TestHealthHandler(t *testing.T) { } }) } + +func TestAnalyzeURLHandler(t *testing.T) { + successResult := &analyzer.AnalysisResult{ + URL: "https://example.com/privacy", + OverallScore: 7.5, + RiskLevel: analyzer.RiskModerate, + Dimensions: map[string]analyzer.DimScore{ + analyzer.DimDataCollection: {Score: 8.0, Summary: "Good data collection practices"}, + analyzer.DimDataSharing: {Score: 7.0, Summary: "Moderate data sharing"}, + analyzer.DimUserRights: {Score: 7.5, Summary: "Adequate user rights"}, + analyzer.DimRetention: {Score: 7.0, Summary: "Reasonable retention"}, + analyzer.DimSecurity: {Score: 8.0, Summary: "Strong security measures"}, + }, + KeyConcerns: []string{"Third-party analytics"}, + Summary: "Overall moderate privacy practices", + Cached: false, + AnalyzedAt: time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC), + } + + // Create a test server that serves HTML content. + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + fmt.Fprint(w, "Privacy Policy content") + })) + defer ts.Close() + + // Create a fetcher that skips SSRF validation for the test server. + testFetcher := NewFetcher() + testFetcher.ValidateFunc = func(_ string) error { return nil } + + t.Run("valid URL returns 200 with analysis result", func(t *testing.T) { + mock := &mockPipeline{result: successResult} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := fmt.Sprintf(`{"url":%q}`, ts.URL) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusOK { + t.Errorf("status code = %d, want %d", got, http.StatusOK) + } + + var got analyzer.AnalysisResult + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("failed to decode response body: %v", err) + } + if got.OverallScore != successResult.OverallScore { + t.Errorf("overall_score = %v, want %v", got.OverallScore, successResult.OverallScore) + } + }) + + t.Run("missing URL returns 400", func(t *testing.T) { + mock := &mockPipeline{result: successResult} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := `{"url":""}` + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusBadRequest { + t.Errorf("status code = %d, want %d", got, http.StatusBadRequest) + } + + var got map[string]string + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("failed to decode response body: %v", err) + } + if !strings.Contains(strings.ToLower(got["error"]), "url") { + t.Errorf("error message = %q, want it to mention 'url'", got["error"]) + } + }) + + t.Run("malformed JSON returns 400", func(t *testing.T) { + mock := &mockPipeline{result: successResult} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := `{not valid json}` + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusBadRequest { + t.Errorf("status code = %d, want %d", got, http.StatusBadRequest) + } + }) + + t.Run("fetch failure returns error with extension suggestion", func(t *testing.T) { + // Create a server that returns 403. + forbidden := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + })) + defer forbidden.Close() + + mock := &mockPipeline{result: successResult} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := fmt.Sprintf(`{"url":%q}`, forbidden.URL) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusBadGateway { + t.Errorf("status code = %d, want %d", got, http.StatusBadGateway) + } + + var got map[string]string + if err := json.NewDecoder(w.Body).Decode(&got); err != nil { + t.Fatalf("failed to decode response body: %v", err) + } + if !strings.Contains(strings.ToLower(got["error"]), "extension") { + t.Errorf("error message = %q, want it to suggest using the browser extension", got["error"]) + } + }) + + t.Run("pipeline error returns 502", func(t *testing.T) { + mock := &mockPipeline{err: errors.New("llm service unavailable")} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := fmt.Sprintf(`{"url":%q}`, ts.URL) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusBadGateway { + t.Errorf("status code = %d, want %d", got, http.StatusBadGateway) + } + }) + + t.Run("pipeline timeout returns 504", func(t *testing.T) { + mock := &mockPipeline{err: context.DeadlineExceeded} + handler := NewAnalyzeURLHandler(mock, testFetcher, nil) + + body := fmt.Sprintf(`{"url":%q}`, ts.URL) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusGatewayTimeout { + t.Errorf("status code = %d, want %d", got, http.StatusGatewayTimeout) + } + }) + + t.Run("nil pipeline returns 503", func(t *testing.T) { + handler := NewAnalyzeURLHandler(nil, testFetcher, nil) + + body := fmt.Sprintf(`{"url":%q}`, ts.URL) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/api/v1/analyze-url", strings.NewReader(body)) + + handler(w, r) + + if got := w.Code; got != http.StatusServiceUnavailable { + t.Errorf("status code = %d, want %d", got, http.StatusServiceUnavailable) + } + }) +} diff --git a/backend/internal/api/middleware.go b/backend/internal/api/middleware.go index 80f3f6e..fef2bff 100644 --- a/backend/internal/api/middleware.go +++ b/backend/internal/api/middleware.go @@ -4,6 +4,7 @@ import ( "context" "log/slog" "net/http" + "strings" "time" "github.com/google/uuid" @@ -39,19 +40,50 @@ func (sr *statusRecorder) Write(b []byte) (int, error) { } // CORSMiddleware adds CORS headers to all responses and handles OPTIONS preflight requests. -func CORSMiddleware(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Access-Control-Allow-Origin", "*") - w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE") - w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") +// allowedOrigins is a comma-separated list of allowed origins, or "*" for all. +func CORSMiddleware(allowedOrigins string) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + origin := r.Header.Get("Origin") + if allowedOrigins == "*" || allowedOrigins == "" { + w.Header().Set("Access-Control-Allow-Origin", "*") + } else if origin != "" && originAllowed(origin, allowedOrigins) { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Vary", "Origin") + } + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") - if r.Method == http.MethodOptions { - w.WriteHeader(http.StatusNoContent) - return + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + + next.ServeHTTP(w, r) + }) + } +} + +// originAllowed checks if origin is in the comma-separated allowedOrigins list. +func originAllowed(origin, allowedOrigins string) bool { + for _, allowed := range splitOrigins(allowedOrigins) { + if allowed == origin { + return true } + } + return false +} - next.ServeHTTP(w, r) - }) +// splitOrigins splits a comma-separated origins string and trims whitespace. +func splitOrigins(origins string) []string { + var result []string + for _, o := range strings.Split(origins, ",") { + trimmed := strings.TrimSpace(o) + if trimmed != "" { + result = append(result, trimmed) + } + } + return result } // LoggingMiddleware returns middleware that logs request method, path, status code, and duration. diff --git a/backend/internal/api/middleware_test.go b/backend/internal/api/middleware_test.go index e081336..f11bc94 100644 --- a/backend/internal/api/middleware_test.go +++ b/backend/internal/api/middleware_test.go @@ -17,8 +17,8 @@ func TestCORSMiddleware(t *testing.T) { w.WriteHeader(http.StatusOK) }) - t.Run("sets CORS headers on regular request", func(t *testing.T) { - handler := CORSMiddleware(dummy) + t.Run("sets wildcard origin when configured with *", func(t *testing.T) { + handler := CORSMiddleware("*")(dummy) w := httptest.NewRecorder() r := httptest.NewRequest(http.MethodGet, "/", nil) @@ -36,7 +36,7 @@ func TestCORSMiddleware(t *testing.T) { }) t.Run("handles OPTIONS with 204 and no body", func(t *testing.T) { - handler := CORSMiddleware(dummy) + handler := CORSMiddleware("*")(dummy) w := httptest.NewRecorder() r := httptest.NewRequest(http.MethodOptions, "/", nil) @@ -59,7 +59,7 @@ func TestCORSMiddleware(t *testing.T) { called = true w.WriteHeader(http.StatusOK) }) - handler := CORSMiddleware(next) + handler := CORSMiddleware("*")(next) w := httptest.NewRecorder() r := httptest.NewRequest(http.MethodGet, "/", nil) @@ -75,7 +75,7 @@ func TestCORSMiddleware(t *testing.T) { next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { called = true }) - handler := CORSMiddleware(next) + handler := CORSMiddleware("*")(next) w := httptest.NewRecorder() r := httptest.NewRequest(http.MethodOptions, "/", nil) @@ -85,6 +85,32 @@ func TestCORSMiddleware(t *testing.T) { t.Error("next handler should not be called for OPTIONS") } }) + + t.Run("sets matching origin when configured with specific origins", func(t *testing.T) { + handler := CORSMiddleware("https://example.com, https://other.com")(dummy) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.Header.Set("Origin", "https://example.com") + + handler.ServeHTTP(w, r) + + if got := w.Header().Get("Access-Control-Allow-Origin"); got != "https://example.com" { + t.Errorf("Access-Control-Allow-Origin = %q, want %q", got, "https://example.com") + } + }) + + t.Run("does not set origin for non-matching request", func(t *testing.T) { + handler := CORSMiddleware("https://example.com")(dummy) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.Header.Set("Origin", "https://evil.com") + + handler.ServeHTTP(w, r) + + if got := w.Header().Get("Access-Control-Allow-Origin"); got != "" { + t.Errorf("Access-Control-Allow-Origin = %q, want empty for non-matching origin", got) + } + }) } func TestLoggingMiddleware(t *testing.T) { diff --git a/backend/internal/api/ratelimiter.go b/backend/internal/api/ratelimiter.go new file mode 100644 index 0000000..89690d5 --- /dev/null +++ b/backend/internal/api/ratelimiter.go @@ -0,0 +1,89 @@ +package api + +import ( + "fmt" + "net" + "net/http" + "sync" + "time" +) + +// RateLimiter implements a sliding-window per-key rate limiter. +type RateLimiter struct { + mu sync.Mutex + limit int + window time.Duration + clients map[string][]time.Time +} + +// NewRateLimiter creates a rate limiter that allows limit requests per window per key. +func NewRateLimiter(limit int, window time.Duration) *RateLimiter { + return &RateLimiter{ + limit: limit, + window: window, + clients: make(map[string][]time.Time), + } +} + +// Allow returns true if the key has not exceeded the rate limit. +func (rl *RateLimiter) Allow(key string) bool { + rl.mu.Lock() + defer rl.mu.Unlock() + + now := time.Now() + cutoff := now.Add(-rl.window) + + // Prune expired entries. + timestamps := rl.clients[key] + valid := timestamps[:0] + for _, t := range timestamps { + if t.After(cutoff) { + valid = append(valid, t) + } + } + + if len(valid) >= rl.limit { + rl.clients[key] = valid + return false + } + + rl.clients[key] = append(valid, now) + return true +} + +// RateLimitMiddleware returns middleware that rate-limits requests by client IP. +// The client IP is extracted from X-Forwarded-For (first value) or RemoteAddr. +func RateLimitMiddleware(rl *RateLimiter) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ip := clientIP(r) + if !rl.Allow(ip) { + w.Header().Set("Retry-After", fmt.Sprintf("%d", int(rl.window.Seconds()))) + WriteError(w, http.StatusTooManyRequests, "Rate limit exceeded. Please try again later.") + return + } + next.ServeHTTP(w, r) + }) + } +} + +// clientIP extracts the client IP from X-Forwarded-For or RemoteAddr. +func clientIP(r *http.Request) string { + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + // Take the first IP in the chain (client IP). + if i := len(xff); i > 0 { + for j := 0; j < len(xff); j++ { + if xff[j] == ',' { + return xff[:j] + } + } + return xff + } + } + + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + return r.RemoteAddr + } + return host +} diff --git a/backend/internal/api/ratelimiter_test.go b/backend/internal/api/ratelimiter_test.go new file mode 100644 index 0000000..abf0deb --- /dev/null +++ b/backend/internal/api/ratelimiter_test.go @@ -0,0 +1,189 @@ +package api + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" +) + +func TestRateLimiter(t *testing.T) { + t.Run("allows requests under limit", func(t *testing.T) { + rl := NewRateLimiter(5, time.Minute) + + for i := 0; i < 5; i++ { + if !rl.Allow("192.0.2.1") { + t.Errorf("request %d should be allowed", i+1) + } + } + }) + + t.Run("blocks requests over limit", func(t *testing.T) { + rl := NewRateLimiter(3, time.Minute) + + for i := 0; i < 3; i++ { + rl.Allow("192.0.2.1") + } + + if rl.Allow("192.0.2.1") { + t.Error("request over limit should be blocked") + } + }) + + t.Run("tracks different IPs independently", func(t *testing.T) { + rl := NewRateLimiter(2, time.Minute) + + rl.Allow("192.0.2.1") + rl.Allow("192.0.2.1") + + if rl.Allow("192.0.2.1") { + t.Error("IP 1 should be blocked") + } + if !rl.Allow("192.0.2.2") { + t.Error("IP 2 should be allowed") + } + }) + + t.Run("window expires and allows new requests", func(t *testing.T) { + rl := NewRateLimiter(1, 50*time.Millisecond) + + if !rl.Allow("192.0.2.1") { + t.Error("first request should be allowed") + } + if rl.Allow("192.0.2.1") { + t.Error("second request should be blocked") + } + + time.Sleep(60 * time.Millisecond) + + if !rl.Allow("192.0.2.1") { + t.Error("request after window expiry should be allowed") + } + }) + + t.Run("concurrent access is safe", func(t *testing.T) { + rl := NewRateLimiter(100, time.Minute) + + var wg sync.WaitGroup + for i := 0; i < 100; i++ { + wg.Add(1) + go func() { + defer wg.Done() + rl.Allow("192.0.2.1") + }() + } + wg.Wait() + + if rl.Allow("192.0.2.1") { + t.Error("should be blocked after 100 concurrent requests") + } + }) +} + +func TestRateLimitMiddleware(t *testing.T) { + t.Run("returns 429 when rate limited", func(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + handler := RateLimitMiddleware(rl)(inner) + + // First request should succeed. + w1 := httptest.NewRecorder() + r1 := httptest.NewRequest(http.MethodPost, "/", nil) + r1.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w1, r1) + + if w1.Code != http.StatusOK { + t.Errorf("first request: status = %d, want %d", w1.Code, http.StatusOK) + } + + // Second request should be rate limited. + w2 := httptest.NewRecorder() + r2 := httptest.NewRequest(http.MethodPost, "/", nil) + r2.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w2, r2) + + if w2.Code != http.StatusTooManyRequests { + t.Errorf("second request: status = %d, want %d", w2.Code, http.StatusTooManyRequests) + } + }) + + t.Run("sets Retry-After header on 429", func(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + handler := RateLimitMiddleware(rl)(inner) + + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/", nil) + r.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w, r) + + // Trigger rate limit. + w2 := httptest.NewRecorder() + r2 := httptest.NewRequest(http.MethodPost, "/", nil) + r2.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w2, r2) + + retryAfter := w2.Header().Get("Retry-After") + if retryAfter == "" { + t.Error("expected Retry-After header on 429 response") + } + }) + + t.Run("returns JSON error body on 429", func(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + handler := RateLimitMiddleware(rl)(inner) + + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/", nil) + r.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w, r) + + w2 := httptest.NewRecorder() + r2 := httptest.NewRequest(http.MethodPost, "/", nil) + r2.RemoteAddr = "192.0.2.1:12345" + handler.ServeHTTP(w2, r2) + + var got map[string]string + if err := json.NewDecoder(w2.Body).Decode(&got); err != nil { + t.Fatalf("failed to decode response body: %v", err) + } + if got["error"] == "" { + t.Error("expected non-empty error message in 429 response body") + } + }) + + t.Run("extracts IP from X-Forwarded-For header", func(t *testing.T) { + rl := NewRateLimiter(1, time.Minute) + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + handler := RateLimitMiddleware(rl)(inner) + + // First request with X-Forwarded-For. + w1 := httptest.NewRecorder() + r1 := httptest.NewRequest(http.MethodPost, "/", nil) + r1.RemoteAddr = "10.0.0.1:12345" + r1.Header.Set("X-Forwarded-For", "203.0.113.1") + handler.ServeHTTP(w1, r1) + + // Second request from same forwarded IP. + w2 := httptest.NewRecorder() + r2 := httptest.NewRequest(http.MethodPost, "/", nil) + r2.RemoteAddr = "10.0.0.2:54321" + r2.Header.Set("X-Forwarded-For", "203.0.113.1") + handler.ServeHTTP(w2, r2) + + if w2.Code != http.StatusTooManyRequests { + t.Errorf("second request from same forwarded IP: status = %d, want %d", w2.Code, http.StatusTooManyRequests) + } + }) +} diff --git a/backend/internal/api/router.go b/backend/internal/api/router.go index 96ca0f2..864b2f1 100644 --- a/backend/internal/api/router.go +++ b/backend/internal/api/router.go @@ -12,19 +12,35 @@ import ( // defaultRequestTimeout is the default deadline for request processing. const defaultRequestTimeout = 60 * time.Second +// analyzeURLRateLimit is the number of analyze-url requests allowed per IP per window. +const analyzeURLRateLimit = 5 + +// analyzeURLRateWindow is the sliding window duration for rate limiting. +const analyzeURLRateWindow = time.Minute + // NewRouter creates and returns a fully configured HTTP handler with // registered routes and middleware. // Middleware order (outermost to innermost): CORS -> Request ID -> Timeout -> Logging -> routes. // If logger is nil, the default slog logger is used. // If cfg is nil, the health handler uses zero-value config defaults. -// If pipeline is nil, the analyze endpoint returns 503 Service Unavailable. +// If pipeline is nil, the analyze endpoints return 503 Service Unavailable. // qdrantCheck is an optional function for the health endpoint to verify Qdrant connectivity. func NewRouter(logger *slog.Logger, cfg *config.Config, pipeline PipelineRunner, qdrantCheck func(ctx context.Context) error) http.Handler { + if cfg == nil { + cfg = &config.Config{} + } + mux := http.NewServeMux() mux.HandleFunc("GET /api/v1/health", NewHealthHandler(cfg, qdrantCheck)) mux.HandleFunc("POST /api/v1/analyze", NewAnalyzeHandler(pipeline, logger)) + // analyze-url endpoint with per-IP rate limiting. + fetcher := NewFetcher() + rateLimiter := NewRateLimiter(analyzeURLRateLimit, analyzeURLRateWindow) + analyzeURLHandler := NewAnalyzeURLHandler(pipeline, fetcher, logger) + mux.Handle("POST /api/v1/analyze-url", RateLimitMiddleware(rateLimiter)(analyzeURLHandler)) + // Catch-all for unmatched routes to return JSON 404. mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { WriteError(w, http.StatusNotFound, "not found") @@ -34,7 +50,7 @@ func NewRouter(logger *slog.Logger, cfg *config.Config, pipeline PipelineRunner, handler = LoggingMiddleware(logger)(handler) handler = TimeoutMiddleware(defaultRequestTimeout)(handler) handler = RequestIDMiddleware(handler) - handler = CORSMiddleware(handler) + handler = CORSMiddleware(cfg.CORSAllowedOrigins)(handler) return handler } diff --git a/backend/internal/api/urlvalidator.go b/backend/internal/api/urlvalidator.go new file mode 100644 index 0000000..e3f856f --- /dev/null +++ b/backend/internal/api/urlvalidator.go @@ -0,0 +1,77 @@ +package api + +import ( + "fmt" + "net" + "net/url" + "strings" +) + +// ValidateURL checks that rawURL is a safe, publicly-accessible HTTP(S) URL. +// It rejects private IPs, loopback, link-local, and non-HTTP schemes to prevent SSRF. +func ValidateURL(rawURL string) error { + if rawURL == "" { + return fmt.Errorf("url is required") + } + + parsed, err := url.Parse(rawURL) + if err != nil { + return fmt.Errorf("invalid url: %w", err) + } + + // Only allow http and https schemes. + scheme := strings.ToLower(parsed.Scheme) + if scheme != "http" && scheme != "https" { + return fmt.Errorf("unsupported scheme %q: only http and https are allowed", parsed.Scheme) + } + + hostname := parsed.Hostname() + if hostname == "" { + return fmt.Errorf("url must include a hostname") + } + + // Reject localhost by name. + if strings.EqualFold(hostname, "localhost") { + return fmt.Errorf("localhost is not allowed") + } + + // Resolve hostname to IPs and validate each one. + ips, err := net.LookupHost(hostname) + if err != nil { + // If it's already an IP literal, validate directly. + ip := net.ParseIP(hostname) + if ip == nil { + return fmt.Errorf("cannot resolve hostname %q: %w", hostname, err) + } + return validateIP(ip) + } + + for _, ipStr := range ips { + ip := net.ParseIP(ipStr) + if ip == nil { + continue + } + if err := validateIP(ip); err != nil { + return err + } + } + + return nil +} + +// validateIP rejects private, loopback, link-local, and unspecified IP addresses. +func validateIP(ip net.IP) error { + if ip.IsLoopback() { + return fmt.Errorf("loopback address %s is not allowed", ip) + } + if ip.IsPrivate() { + return fmt.Errorf("private address %s is not allowed", ip) + } + if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return fmt.Errorf("link-local address %s is not allowed", ip) + } + if ip.IsUnspecified() { + return fmt.Errorf("unspecified address %s is not allowed", ip) + } + return nil +} diff --git a/backend/internal/api/urlvalidator_test.go b/backend/internal/api/urlvalidator_test.go new file mode 100644 index 0000000..a2581ce --- /dev/null +++ b/backend/internal/api/urlvalidator_test.go @@ -0,0 +1,119 @@ +package api + +import ( + "testing" +) + +func TestValidateURL(t *testing.T) { + t.Run("accepts valid HTTPS URL", func(t *testing.T) { + if err := ValidateURL("https://example.com/privacy"); err != nil { + t.Errorf("unexpected error for valid HTTPS URL: %v", err) + } + }) + + t.Run("accepts valid HTTP URL", func(t *testing.T) { + if err := ValidateURL("http://example.com/privacy"); err != nil { + t.Errorf("unexpected error for valid HTTP URL: %v", err) + } + }) + + t.Run("rejects empty URL", func(t *testing.T) { + if err := ValidateURL(""); err == nil { + t.Error("expected error for empty URL") + } + }) + + t.Run("rejects URL without hostname", func(t *testing.T) { + if err := ValidateURL("http:///path"); err == nil { + t.Error("expected error for URL without hostname") + } + }) + + t.Run("rejects non-HTTP schemes", func(t *testing.T) { + schemes := []string{ + "file:///etc/passwd", + "ftp://example.com/file", + "data:text/html,

hi

", + "javascript:alert(1)", + "gopher://example.com", + } + for _, u := range schemes { + if err := ValidateURL(u); err == nil { + t.Errorf("expected error for scheme in %q", u) + } + } + }) + + t.Run("rejects private IP 10.x.x.x", func(t *testing.T) { + urls := []string{ + "http://10.0.0.1/page", + "http://10.255.255.255/page", + } + for _, u := range urls { + if err := ValidateURL(u); err == nil { + t.Errorf("expected error for private IP in %q", u) + } + } + }) + + t.Run("rejects private IP 172.16-31.x.x", func(t *testing.T) { + urls := []string{ + "http://172.16.0.1/page", + "http://172.31.255.255/page", + } + for _, u := range urls { + if err := ValidateURL(u); err == nil { + t.Errorf("expected error for private IP in %q", u) + } + } + }) + + t.Run("rejects private IP 192.168.x.x", func(t *testing.T) { + if err := ValidateURL("http://192.168.1.1/page"); err == nil { + t.Error("expected error for private IP 192.168.x.x") + } + }) + + t.Run("rejects loopback 127.x.x.x", func(t *testing.T) { + urls := []string{ + "http://127.0.0.1/page", + "http://127.0.0.2/page", + } + for _, u := range urls { + if err := ValidateURL(u); err == nil { + t.Errorf("expected error for loopback in %q", u) + } + } + }) + + t.Run("rejects localhost", func(t *testing.T) { + if err := ValidateURL("http://localhost:8080/page"); err == nil { + t.Error("expected error for localhost") + } + }) + + t.Run("rejects IPv6 loopback", func(t *testing.T) { + if err := ValidateURL("http://[::1]/page"); err == nil { + t.Error("expected error for IPv6 loopback") + } + }) + + t.Run("rejects link-local 169.254.x.x", func(t *testing.T) { + if err := ValidateURL("http://169.254.1.1/page"); err == nil { + t.Error("expected error for link-local address") + } + }) + + t.Run("rejects 0.0.0.0", func(t *testing.T) { + if err := ValidateURL("http://0.0.0.0/page"); err == nil { + t.Error("expected error for 0.0.0.0") + } + }) + + t.Run("accepts public IP", func(t *testing.T) { + // 8.8.8.8 is a public DNS IP + if err := ValidateURL("http://8.8.8.8/page"); err != nil { + t.Errorf("unexpected error for public IP: %v", err) + } + }) +} diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 8fd6052..40abeec 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -8,12 +8,13 @@ import ( // Config holds all configuration values loaded from environment variables. type Config struct { - Port string - LogLevel string - AnthropicAPIKey string - OpenAIAPIKey string - QdrantURL string - CacheDefaultTTL string + Port string + LogLevel string + AnthropicAPIKey string + OpenAIAPIKey string + QdrantURL string + CacheDefaultTTL string + CORSAllowedOrigins string } // Load reads configuration from environment variables, applies defaults for @@ -25,7 +26,8 @@ func Load() (*Config, error) { AnthropicAPIKey: os.Getenv("ANTHROPIC_API_KEY"), OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"), QdrantURL: getEnvOrDefault("QDRANT_URL", "localhost:6334"), - CacheDefaultTTL: getEnvOrDefault("CACHE_DEFAULT_TTL", "720h"), + CacheDefaultTTL: getEnvOrDefault("CACHE_DEFAULT_TTL", "720h"), + CORSAllowedOrigins: getEnvOrDefault("CORS_ALLOWED_ORIGINS", "*"), } var missing []string diff --git a/backend/internal/types/request.go b/backend/internal/types/request.go new file mode 100644 index 0000000..5143059 --- /dev/null +++ b/backend/internal/types/request.go @@ -0,0 +1,6 @@ +package types + +// AnalyzeURLRequest represents the JSON body for the analyze-url endpoint. +type AnalyzeURLRequest struct { + URL string `json:"url"` +} From c3d3ddf823e67c8bb4062859bd5225f0db83e4af Mon Sep 17 00:00:00 2001 From: Parth576 Date: Sun, 1 Mar 2026 00:16:34 -0500 Subject: [PATCH 3/5] feat(website): add URL analysis form with API integration and results display Replace the try-it section placeholder with a functional URL analysis feature that lets users paste a privacy policy URL and get scored results without installing the browser extension. Includes client-side URL validation, loading states, comprehensive error handling (network, fetch failure, rate limiting, non-policy content), and responsive results rendering with risk-level color coding matching the scoring system spec. Assisted by the code-assist SOP --- .../website-url-analysis/context.md | 75 +++++ .../website-url-analysis/plan.md | 73 +++++ .../website-url-analysis/progress.md | 38 +++ website/index.html | 88 +++++- website/script.js | 287 ++++++++++++++++++ 5 files changed, 556 insertions(+), 5 deletions(-) create mode 100644 .agents/scratchpad/2026-02-15-smolterms/website-url-analysis/context.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/website-url-analysis/plan.md create mode 100644 .agents/scratchpad/2026-02-15-smolterms/website-url-analysis/progress.md diff --git a/.agents/scratchpad/2026-02-15-smolterms/website-url-analysis/context.md b/.agents/scratchpad/2026-02-15-smolterms/website-url-analysis/context.md new file mode 100644 index 0000000..4ad5fd6 --- /dev/null +++ b/.agents/scratchpad/2026-02-15-smolterms/website-url-analysis/context.md @@ -0,0 +1,75 @@ +# Context: Website URL Analysis Feature + +## Project Structure +- `website/index.html` - Landing page using Tailwind CSS CDN, Inter font, vanilla JS +- `website/script.js` - Mobile menu toggle + smooth scrolling (28 lines) +- No `styles.css` - all styling via Tailwind utility classes inline +- No build system - plain static files served directly + +## Requirements Summary + +### Functional +1. Replace the `
` placeholder (line 279-290) with: + - URL input field with placeholder text + - "Analyze" submit button + - Results container (hidden by default) +2. Form submission flow: + - Client-side URL validation (basic format check) + - Loading state (spinner, disabled button, progress text) + - POST to `{API_BASE_URL}/api/v1/analyze-url` with `{ "url": "..." }` + - Render results or errors +3. Results display: overall score + risk badge, 5 dimension scores, key concerns list, summary, cached indicator +4. Error handling: network errors, fetch failures (suggest extension), rate limiting (429), invalid URL, non-policy content +5. Configurable API base URL (default `http://localhost:8080`) +6. Smooth scroll to results after analysis + +### Backend API Contract +- **Request:** `POST /api/v1/analyze-url` with `{ "url": "..." }` +- **Success Response (200):** `AnalysisResult` struct: + ```json + { + "url": "...", + "overall_score": 7.2, + "risk_level": "moderate", // "low"|"moderate"|"high"|"critical"|"not_policy" + "dimensions": { + "data_collection": { "score": 7.0, "summary": "..." }, + "data_sharing": { "score": 6.5, "summary": "..." }, + "user_rights": { "score": 8.0, "summary": "..." }, + "retention": { "score": 7.5, "summary": "..." }, + "security": { "score": 7.0, "summary": "..." } + }, + "key_concerns": ["concern1", "concern2"], + "summary": "...", + "cached": false, + "analyzed_at": "2026-02-15T..." + } + ``` +- **Error Response:** `{ "error": "..." }` with appropriate HTTP status codes + - 400: Invalid request / empty URL + - 429: Rate limited (5 req/min/IP) + - 502: Fetch failure / analysis failure + - 504: Analysis timeout + +### Risk Level Colors (from Tailwind config) +- Low (8-10): `risk-low` (#4ade80 green) +- Moderate (5-7.9): `risk-moderate` (#facc15 yellow) +- High (3-4.9): `risk-high` (#fb923c orange) +- Critical (1-2.9): `risk-critical` (#f87171 red) + +## Existing Patterns +- Website uses Tailwind CSS CDN - no custom CSS file, all utility classes +- Existing sections use `max-w-3xl` or `max-w-5xl` with `mx-auto`, `px-6`, `py-20` +- Cards use `bg-cream-100 rounded-xl p-5/p-6 border border-cream-300` pattern +- Colors from custom cream palette (cream-50 through cream-900) +- Risk colors already defined in Tailwind config +- JS is vanilla, no framework, no modules +- Try-it section currently has `max-w-3xl mx-auto text-center` layout + +## Implementation Path +1. Edit `website/index.html` - Replace try-it placeholder with form HTML + results container +2. Edit `website/script.js` - Add all JS logic (validation, API calls, rendering, error handling) +3. Add inline ` @@ -276,15 +280,89 @@

Risk Levels

- +
-

Try It Out

+

Analyze a Privacy Policy

- Paste a URL to analyze its privacy policy directly from this page. + Don't want to install the extension? Paste a privacy policy URL below and get the same analysis.

-
-

URL analysis feature coming soon.

+ + +
+ + +
+ + + + + + + + + + + +
diff --git a/website/script.js b/website/script.js index 6228274..a280d85 100644 --- a/website/script.js +++ b/website/script.js @@ -26,3 +26,290 @@ document.querySelectorAll('a[href^="#"]').forEach(anchor => { } }); }); + +// --------------------------------------------------------------------------- +// URL Analysis Feature +// --------------------------------------------------------------------------- + +const API_BASE = 'http://localhost:8080'; + +// DOM references +const analyzeForm = document.getElementById('analyze-form'); +const urlInput = document.getElementById('url-input'); +const analyzeBtn = document.getElementById('analyze-btn'); +const validationMsg = document.getElementById('validation-msg'); +const loadingState = document.getElementById('loading-state'); +const errorState = document.getElementById('error-state'); +const errorMessage = document.getElementById('error-message'); +const errorSuggestion = document.getElementById('error-suggestion'); +const resultsState = document.getElementById('results-state'); +const overallScore = document.getElementById('overall-score'); +const riskBadge = document.getElementById('risk-badge'); +const cachedBadge = document.getElementById('cached-badge'); +const dimensionScores = document.getElementById('dimension-scores'); +const summaryText = document.getElementById('summary-text'); +const concernsList = document.getElementById('concerns-list'); +const analyzeAnotherBtn = document.getElementById('analyze-another-btn'); + +// Dimension key -> display label +const DIMENSION_LABELS = { + data_collection: 'Data Collection', + data_sharing: 'Data Sharing', + user_rights: 'User Rights', + retention: 'Retention', + security: 'Security', +}; + +// Risk level -> Tailwind classes for the badge +const RISK_STYLES = { + low: { bg: 'bg-risk-low', text: 'text-green-900', label: 'Low Risk' }, + moderate: { bg: 'bg-risk-moderate', text: 'text-yellow-900', label: 'Moderate Risk' }, + high: { bg: 'bg-risk-high', text: 'text-orange-900', label: 'High Risk' }, + critical: { bg: 'bg-risk-critical', text: 'text-red-900', label: 'Critical Risk' }, +}; + +// Score -> Tailwind text color for dimension score numbers +function getScoreColor(score) { + if (score >= 8) return 'text-green-600'; + if (score >= 5) return 'text-yellow-600'; + if (score >= 3) return 'text-orange-500'; + return 'text-red-500'; +} + +// --------------------------------------------------------------------------- +// State management +// --------------------------------------------------------------------------- + +function resetState() { + validationMsg.classList.add('hidden'); + validationMsg.textContent = ''; + loadingState.classList.add('hidden'); + errorState.classList.add('hidden'); + resultsState.classList.add('hidden'); + errorSuggestion.classList.add('hidden'); +} + +function showLoading() { + resetState(); + loadingState.classList.remove('hidden'); + analyzeBtn.disabled = true; + analyzeBtn.textContent = 'Analyzing...'; +} + +function hideLoading() { + loadingState.classList.add('hidden'); + analyzeBtn.disabled = false; + analyzeBtn.textContent = 'Analyze'; +} + +function showValidation(message) { + resetState(); + validationMsg.textContent = message; + validationMsg.classList.remove('hidden'); +} + +function showError(message, suggestion) { + hideLoading(); + errorState.classList.remove('hidden'); + errorMessage.textContent = message; + if (suggestion) { + errorSuggestion.textContent = suggestion; + errorSuggestion.classList.remove('hidden'); + } + scrollToElement(errorState); +} + +function scrollToElement(el) { + // Small delay to ensure DOM is painted before scrolling + setTimeout(() => { + el.scrollIntoView({ behavior: 'smooth', block: 'start' }); + }, 100); +} + +// --------------------------------------------------------------------------- +// URL validation +// --------------------------------------------------------------------------- + +function isValidURL(str) { + try { + const url = new URL(str); + return url.protocol === 'http:' || url.protocol === 'https:'; + } catch { + return false; + } +} + +// --------------------------------------------------------------------------- +// API call +// --------------------------------------------------------------------------- + +async function analyzeURL(url) { + const response = await fetch(`${API_BASE}/api/v1/analyze-url`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }); + + const data = await response.json(); + + if (!response.ok) { + const error = new Error(data.error || 'An unexpected error occurred'); + error.status = response.status; + throw error; + } + + return data; +} + +// --------------------------------------------------------------------------- +// Results rendering +// --------------------------------------------------------------------------- + +function renderResults(data) { + hideLoading(); + + // Handle not_policy as a special case + if (data.risk_level === 'not_policy') { + showError( + "This page doesn't appear to be a privacy policy.", + 'Try submitting a direct link to a privacy policy or terms of service page.' + ); + return; + } + + // Overall score + overallScore.textContent = data.overall_score.toFixed(1); + overallScore.className = `text-4xl font-bold ${getScoreColor(data.overall_score)}`; + + // Risk badge + const risk = RISK_STYLES[data.risk_level] || RISK_STYLES.moderate; + riskBadge.textContent = risk.label; + riskBadge.className = `px-4 py-1.5 rounded-full text-sm font-semibold ${risk.bg} ${risk.text}`; + + // Cached badge + if (data.cached) { + cachedBadge.classList.remove('hidden'); + } else { + cachedBadge.classList.add('hidden'); + } + + // Dimension scores + dimensionScores.innerHTML = ''; + const dimensionOrder = ['data_collection', 'data_sharing', 'user_rights', 'retention', 'security']; + for (const key of dimensionOrder) { + const dim = data.dimensions[key]; + if (!dim) continue; + + const card = document.createElement('div'); + card.className = 'bg-cream-100 rounded-xl p-5 border border-cream-300 text-left'; + card.innerHTML = ` +
+

${DIMENSION_LABELS[key] || key}

+ ${dim.score.toFixed(1)} +
+

${escapeHTML(dim.summary)}

+ `; + dimensionScores.appendChild(card); + } + + // Summary + summaryText.textContent = data.summary || ''; + + // Key concerns + concernsList.innerHTML = ''; + if (data.key_concerns && data.key_concerns.length > 0) { + for (const concern of data.key_concerns) { + const li = document.createElement('li'); + li.className = 'flex items-start gap-2 text-sm text-cream-700'; + li.innerHTML = ` + + + + ${escapeHTML(concern)} + `; + concernsList.appendChild(li); + } + } else { + concernsList.innerHTML = '
  • No major concerns identified.
  • '; + } + + resultsState.classList.remove('hidden'); + scrollToElement(document.getElementById('results-header')); +} + +// Escape HTML to prevent XSS from API response content +function escapeHTML(str) { + const div = document.createElement('div'); + div.textContent = str; + return div.innerHTML; +} + +// --------------------------------------------------------------------------- +// Error handling +// --------------------------------------------------------------------------- + +function handleError(err) { + if (err.status === 429) { + showError('Too many requests. Please wait a moment and try again.'); + return; + } + + if (err.status === 502) { + showError( + err.message || 'Failed to fetch the page for analysis.', + 'Some websites block automated access. Try using the SmolTerms browser extension instead for full compatibility.' + ); + return; + } + + if (err.status === 504) { + showError('Analysis timed out. The page may be too large. Please try again.'); + return; + } + + // Network error (no status) or other unexpected errors + if (!err.status) { + showError('Could not reach the analysis server. Please check that the backend is running and try again.'); + return; + } + + // Generic error with backend message + showError(err.message || 'An unexpected error occurred. Please try again.'); +} + +// --------------------------------------------------------------------------- +// Form submission +// --------------------------------------------------------------------------- + +analyzeForm.addEventListener('submit', async (e) => { + e.preventDefault(); + + const url = urlInput.value.trim(); + + if (!url) { + showValidation('Please enter a URL.'); + return; + } + + if (!isValidURL(url)) { + showValidation('Please enter a valid URL (e.g., https://example.com/privacy).'); + return; + } + + showLoading(); + + try { + const result = await analyzeURL(url); + renderResults(result); + } catch (err) { + handleError(err); + } +}); + +// "Analyze another" resets the form to initial state +analyzeAnotherBtn.addEventListener('click', () => { + resetState(); + urlInput.value = ''; + urlInput.focus(); + document.getElementById('try-it').scrollIntoView({ behavior: 'smooth', block: 'start' }); +}); From 5150bc20fbcb43758a7332eaff68b0e8ec561923 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Wed, 4 Mar 2026 22:38:13 -0500 Subject: [PATCH 4/5] feat(config): make rate limit configurable via env vars Add RATE_LIMIT_REQUESTS and RATE_LIMIT_WINDOW environment variables to configure the per-IP rate limiter, defaulting to 5 requests per minute to preserve existing behavior. --- backend/internal/api/router.go | 16 +++++++++------- backend/internal/config/config.go | 25 ++++++++++++++++++++----- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/backend/internal/api/router.go b/backend/internal/api/router.go index 864b2f1..6ca3064 100644 --- a/backend/internal/api/router.go +++ b/backend/internal/api/router.go @@ -12,12 +12,6 @@ import ( // defaultRequestTimeout is the default deadline for request processing. const defaultRequestTimeout = 60 * time.Second -// analyzeURLRateLimit is the number of analyze-url requests allowed per IP per window. -const analyzeURLRateLimit = 5 - -// analyzeURLRateWindow is the sliding window duration for rate limiting. -const analyzeURLRateWindow = time.Minute - // NewRouter creates and returns a fully configured HTTP handler with // registered routes and middleware. // Middleware order (outermost to innermost): CORS -> Request ID -> Timeout -> Logging -> routes. @@ -37,7 +31,15 @@ func NewRouter(logger *slog.Logger, cfg *config.Config, pipeline PipelineRunner, // analyze-url endpoint with per-IP rate limiting. fetcher := NewFetcher() - rateLimiter := NewRateLimiter(analyzeURLRateLimit, analyzeURLRateWindow) + rateLimit := cfg.RateLimitRequests + if rateLimit == 0 { + rateLimit = 5 + } + rateWindow := cfg.RateLimitWindow + if rateWindow == 0 { + rateWindow = time.Minute + } + rateLimiter := NewRateLimiter(rateLimit, rateWindow) analyzeURLHandler := NewAnalyzeURLHandler(pipeline, fetcher, logger) mux.Handle("POST /api/v1/analyze-url", RateLimitMiddleware(rateLimiter)(analyzeURLHandler)) diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 40abeec..6281783 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -3,7 +3,9 @@ package config import ( "fmt" "os" + "strconv" "strings" + "time" ) // Config holds all configuration values loaded from environment variables. @@ -15,19 +17,32 @@ type Config struct { QdrantURL string CacheDefaultTTL string CORSAllowedOrigins string + RateLimitRequests int + RateLimitWindow time.Duration } // Load reads configuration from environment variables, applies defaults for // optional fields, and returns an error listing any missing required variables. func Load() (*Config, error) { + rateLimitRequests, err := strconv.Atoi(getEnvOrDefault("RATE_LIMIT_REQUESTS", "5")) + if err != nil { + return nil, fmt.Errorf("invalid RATE_LIMIT_REQUESTS: %w", err) + } + rateLimitWindow, err := time.ParseDuration(getEnvOrDefault("RATE_LIMIT_WINDOW", "1m")) + if err != nil { + return nil, fmt.Errorf("invalid RATE_LIMIT_WINDOW: %w", err) + } + cfg := &Config{ - Port: getEnvOrDefault("PORT", "8080"), - LogLevel: getEnvOrDefault("LOG_LEVEL", "info"), - AnthropicAPIKey: os.Getenv("ANTHROPIC_API_KEY"), - OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"), - QdrantURL: getEnvOrDefault("QDRANT_URL", "localhost:6334"), + Port: getEnvOrDefault("PORT", "8080"), + LogLevel: getEnvOrDefault("LOG_LEVEL", "info"), + AnthropicAPIKey: os.Getenv("ANTHROPIC_API_KEY"), + OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"), + QdrantURL: getEnvOrDefault("QDRANT_URL", "localhost:6334"), CacheDefaultTTL: getEnvOrDefault("CACHE_DEFAULT_TTL", "720h"), CORSAllowedOrigins: getEnvOrDefault("CORS_ALLOWED_ORIGINS", "*"), + RateLimitRequests: rateLimitRequests, + RateLimitWindow: rateLimitWindow, } var missing []string From 1c10d44d45e83d3d37e9eacf7fc12ea1dcb86985 Mon Sep 17 00:00:00 2001 From: Parth576 Date: Wed, 4 Mar 2026 22:59:15 -0500 Subject: [PATCH 5/5] feat(api): apply shared rate limiter to /analyze endpoint Both /analyze and /analyze-url now share the same per-IP rate limiter so requests to either endpoint count toward a single budget. --- backend/internal/api/router.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/backend/internal/api/router.go b/backend/internal/api/router.go index 6ca3064..d7d95c2 100644 --- a/backend/internal/api/router.go +++ b/backend/internal/api/router.go @@ -26,11 +26,7 @@ func NewRouter(logger *slog.Logger, cfg *config.Config, pipeline PipelineRunner, mux := http.NewServeMux() - mux.HandleFunc("GET /api/v1/health", NewHealthHandler(cfg, qdrantCheck)) - mux.HandleFunc("POST /api/v1/analyze", NewAnalyzeHandler(pipeline, logger)) - - // analyze-url endpoint with per-IP rate limiting. - fetcher := NewFetcher() + // Per-IP rate limiter shared across analyze endpoints. rateLimit := cfg.RateLimitRequests if rateLimit == 0 { rateLimit = 5 @@ -40,6 +36,11 @@ func NewRouter(logger *slog.Logger, cfg *config.Config, pipeline PipelineRunner, rateWindow = time.Minute } rateLimiter := NewRateLimiter(rateLimit, rateWindow) + + mux.HandleFunc("GET /api/v1/health", NewHealthHandler(cfg, qdrantCheck)) + mux.Handle("POST /api/v1/analyze", RateLimitMiddleware(rateLimiter)(http.HandlerFunc(NewAnalyzeHandler(pipeline, logger)))) + + fetcher := NewFetcher() analyzeURLHandler := NewAnalyzeURLHandler(pipeline, fetcher, logger) mux.Handle("POST /api/v1/analyze-url", RateLimitMiddleware(rateLimiter)(analyzeURLHandler))