CoreNovus · jieyao-MilestoneHub · May 3, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,54 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+# Cancel in-flight runs on the same ref when a new commit lands.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # Match the supported range declared in pyproject.toml
+        # (``requires-python = ">=3.11,<4.0"`` + classifier list).
+        python-version: ["3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Poetry
+        run: pipx install poetry
+
+      - name: Cache Poetry virtualenv
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: poetry-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}
+
+      - name: Install dependencies
+        run: poetry install --with dev --no-interaction --no-ansi
+
+      - name: Lint (ruff)
+        run: poetry run ruff check llm_gateway/ tests/
+
+      - name: Format check (black)
+        run: poetry run black --check llm_gateway/ tests/
+
+      - name: Type check (pyright)
+        run: poetry run pyright llm_gateway/
+
+      - name: Tests (pytest)
+        run: poetry run pytest tests/unit/ -v
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,45 @@
+# Pre-commit hooks — the same gates CI runs, but local and fast.
+#
+# Install once per checkout:
+#     pre-commit install
+#
+# Run all hooks ad hoc:
+#     pre-commit run --all-files
+#
+# Versions pinned to exact SHA / tag so contributors get reproducible
+# behaviour; bump deliberately rather than tracking ``main``.
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-added-large-files
+        args: ["--maxkb=512"]
+      - id: check-merge-conflict
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Match the ruff version pinned in pyproject.toml's dev-deps so
+    # local + CI behaviour does not drift between operators.
+    rev: v0.7.0
+    hooks:
+      - id: ruff
+        args: ["--fix"]
+
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 26.3.1
+    hooks:
+      - id: black
+
+  - repo: https://github.com/Yelp/detect-secrets
+    rev: v1.5.0
+    hooks:
+      - id: detect-secrets
+        args: ["--baseline", ".secrets.baseline"]
+        # Allow first-run when the baseline does not yet exist.
+        exclude: ^\.secrets\.baseline$
diff --git a/NOTICE b/NOTICE
@@ -0,0 +1,16 @@
+llm-gateway
+Copyright 2026 CoreNovus contributors
+
+Licensed under the MIT License — see LICENSE for the full text.
+
+This product is a thin FastAPI proxy in front of vLLM, an open-source
+LLM serving engine (https://github.com/vllm-project/vllm) developed
+by the vLLM project and licensed under the Apache License, Version 2.0:
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+This product does not redistribute the vLLM engine itself; vLLM is
+expected to be supplied by the operator (vendor Docker container) at
+deploy time. References to vLLM in code, documentation, and examples
+are nominative and do not imply endorsement by or affiliation with
+the vLLM project.
diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,80 @@
+# Security policy
+
+## Reporting a vulnerability
+
+Please report suspected security vulnerabilities **privately** via
+[GitHub Security Advisories][advisories].
+
+[advisories]: https://github.com/CoreNovus/llm-gateway/security/advisories/new
+
+Do **not** open a public GitHub issue for security reports.
+
+We aim to acknowledge within 5 business days. Please include:
+
+- A clear description of the vulnerability and its impact
+- Reproduction steps or a minimal proof-of-concept
+- Affected versions / commits / deployment shape (loopback vs. exposed)
+
+If a public exploit is already in circulation, say so in the report —
+we accelerate the disclosure window in that case.
+
+## Disclosure policy
+
+We follow a 90-day coordinated disclosure window:
+
+1. Day 0 — report received, triage begins
+2. Day 5 — acknowledgement + initial severity assessment shared with reporter
+3. Day 90 — fix released, advisory published, reporter credited (if desired)
+
+If the fix lands sooner, the advisory ships with the release. If
+active exploitation is observed, the window is shortened by mutual
+agreement with the reporter.
+
+## Scope
+
+**In scope:**
+
+- Source code under `llm_gateway/`
+- Container / systemd artefacts under `deploy/`
+- Pinned dependencies in `pyproject.toml` / `poetry.lock`
+
+**Out of scope** (please report to upstream):
+
+- vLLM (`https://github.com/vllm-project/vllm`) — engine internals
+- FastAPI / Starlette / httpx / pydantic / uvicorn — framework dependencies
+- Operator-controlled configuration. Empty `BEARER_TOKEN` disables
+  auth; this is documented behaviour for local dev and is gated by
+  `__main__._require_bearer_token` on the production entry path.
+
+## Known limitations (documented, not vulnerabilities)
+
+The following are accepted trade-offs rather than bugs. Filing them
+as vulnerabilities is welcome but the response will point back here:
+
+- The `vllm_upstream_url` SSRF blocklist only checks IPv4 literals
+  against a known-metadata-IP set. Hostnames that resolve to those
+  IPs (DNS rebinding) are not caught at config-load time; the httpx
+  transport must enforce this on its own if the threat model
+  requires it.
+- Chunked transfer-encoding requests are refused with 411 Length
+  Required rather than length-counted at the ASGI layer. Honest JSON
+  clients (httpx, openai-python, langchain-openai, curl) always set
+  `Content-Length` so the practical impact is zero. Operators who
+  must accept chunked uploads need a proxy in front that materialises
+  `Content-Length`, or to extend `BodySizeLimitMiddleware` to a
+  streaming ASGI implementation.
+- The gateway binds to `127.0.0.1` by default and assumes an SSH
+  tunnel as the network-level boundary. Lifting the bind to
+  `0.0.0.0` without re-reading the threat model is an operator
+  misconfiguration, not a gateway vulnerability.
+
+## Hardening recommendations for operators
+
+Beyond the in-process defences this package ships with, deploy-time
+hardening lives in `deploy/`:
+
+- Container hardening: `cap_drop: ALL`, `no-new-privileges`,
+  `read_only` rootfs, non-root UID 1001.
+- Systemd unit: same posture for non-container deploys.
+- Operator-side `--limit-max-requests` / kernel-level limits cover
+  HTTP-protocol-level abuse below this middleware's reach.
diff --git a/llm_gateway/api/chat_completions.py b/llm_gateway/api/chat_completions.py
@@ -23,7 +23,7 @@
 # deferred-string evaluation). Same constraint as ``api/health.py``.
 
 from collections.abc import Callable
-from typing import Annotated
+from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, status
 from fastapi.responses import JSONResponse, Response, StreamingResponse
@@ -134,10 +134,17 @@ def _record_token_usage(metrics: Metrics, model: str, response: dict) -> None:
         metrics.tokens_completion_total.labels(model=model).inc(completion)
 
 
-def _safe_token_count(value: object) -> int:
+def _safe_token_count(value: Any) -> int:
     """Coerce an upstream token-count field to a non-negative ``int``.
 
     Returns 0 for ``None`` / missing / non-numeric / negative values.
+
+    Parameter is typed ``Any`` (not ``object``) because the value comes
+    straight from a parsed-JSON upstream payload — pyright rejects
+    ``int(object | Literal[0])`` since plain ``object`` does not
+    conform to ``ConvertibleToInt`` (str | Buffer | SupportsInt |
+    SupportsIndex). The ``try/except`` already covers every runtime
+    shape, so the static type can be honest about that.
     """
     try:
         count = int(value or 0)

diff --git a/tests/unit/test_rate_limit.py b/tests/unit/test_rate_limit.py
@@ -179,7 +179,9 @@ def test_middleware_passes_request_when_limiter_allows() -> None:
     assert response.status_code == 200
     # The bearer is hashed before becoming the bucket key — the
     # plaintext token must not appear in the bucket dict.
-    expected_digest = hashlib.sha256(b"t1").hexdigest()[:16]
+    # 32 hex chars (128 bits) — bumped from 16 to close a token-grinding
+    # collision attack. See PR #6 / commit f40049e.
+    expected_digest = hashlib.sha256(b"t1").hexdigest()[:32]
     assert limiter.calls == [f"token:{expected_digest}"]