diff --git a/LIBRARY_SUMMARY.md b/LIBRARY_SUMMARY.md
deleted file mode 100644
index b656d2e..0000000
--- a/LIBRARY_SUMMARY.md
+++ /dev/null
@@ -1,462 +0,0 @@
-# Loclean — Library Technical Summary
-
-> **Version**: 0.2.2 | **Python**: ≥3.10 | **License**: Apache-2.0
-> **One-liner**: High-performance, local-first semantic data cleaning library powered by Ollama LLMs.
-
-## Purpose
-
-Loclean is an **AI-powered data cleaning and PII scrubbing library** that uses a locally-running [Ollama](https://ollama.com) instance for inference. It provides three core capabilities:
-
-1. **`clean()`** — Semantic column cleaning on DataFrames (extract numeric values + units from messy text)
-2. **`scrub()`** — PII detection and masking/replacement in text or DataFrames
-3. **`extract()`** — Structured data extraction from text using user-defined Pydantic schemas
-
-All DataFrame operations are **backend-agnostic** via [Narwhals](https://narwhals-dev.github.io/narwhals/), supporting pandas, Polars, PyArrow, cuDF, and Modin interchangeably.
-
----
-
-## Architecture Overview
-
-```mermaid
-graph TB
-    subgraph "Public API (loclean/__init__.py)"
-        Loclean["Loclean class"]
-        clean["clean()"]
-        scrub["scrub()"]
-        extract["extract()"]
-        get_engine["get_engine()"]
-    end
-
-    subgraph "Inference Layer"
-        ABC["InferenceEngine (ABC)"]
-        Ollama["OllamaEngine"]
-        Config["EngineConfig"]
-        Factory["create_engine()"]
-    end
-
-    subgraph "Extraction Layer"
-        Extractor["Extractor"]
-        ExtractDF["extract_dataframe()"]
-        JsonRepair["json_repair"]
-    end
-
-    subgraph "Privacy Layer"
-        PIIDetector["PIIDetector"]
-        RegexDet["RegexDetector"]
-        LLMDet["LLMDetector"]
-        Scrub["scrub_string() / scrub_dataframe()"]
-        FakeGen["FakeDataGenerator"]
-    end
-
-    subgraph "Engine Layer"
-        NarwhalsEng["NarwhalsEngine"]
-    end
-
-    subgraph "Shared"
-        Cache["LocleanCache (SQLite)"]
-        Schemas["Pydantic Schemas"]
-    end
-
-    Loclean --> Ollama
-    Loclean --> Extractor
-    clean --> get_engine --> Ollama
-    clean --> NarwhalsEng
-    scrub --> Scrub --> PIIDetector
-    extract --> Extractor
-    extract --> ExtractDF
-    Extractor --> Ollama
-    Extractor --> JsonRepair
-    Extractor --> Cache
-    PIIDetector --> RegexDet
-    PIIDetector --> LLMDet --> Ollama
-    LLMDet --> Cache
-    Scrub --> FakeGen
-    NarwhalsEng --> Ollama
-    Factory --> Config
-    Factory --> Ollama
-    Ollama --> ABC
-```
-
----
-
-## File Hierarchy
-
-```
-loclean/
-├── pyproject.toml                          # Build config, deps, tool settings
-├── src/loclean/
-│   ├── __init__.py                         # PUBLIC API: Loclean class, clean(), scrub(), extract(), get_engine()
-│   ├── _version.py                         # __version__ = "0.2.2"
-│   ├── cache.py                            # LocleanCache — SQLite3 persistent cache (WAL mode)
-│   │
-│   ├── inference/                          # ── Inference Engine Layer ──
-│   │   ├── __init__.py                     # Re-exports: InferenceEngine, OllamaEngine, EngineConfig
-│   │   ├── base.py                         # InferenceEngine ABC (generate, clean_batch)
-│   │   ├── ollama_engine.py                # OllamaEngine — Ollama HTTP client wrapper
-│   │   ├── config.py                       # EngineConfig (Pydantic) + hierarchical config loader
-│   │   ├── factory.py                      # create_engine() factory function
-│   │   ├── schemas.py                      # ExtractionResult schema (reasoning/value/unit)
-│   │   └── local/                          # Reserved for future local engines
-│   │       └── __init__.py
-│   │
-│   ├── extraction/                         # ── Structured Extraction Layer ──
-│   │   ├── __init__.py                     # Re-exports: Extractor, extract_dataframe
-│   │   ├── extractor.py                    # Extractor — prompt → generate → parse → validate → retry
-│   │   ├── extract_dataframe.py            # DataFrame column extraction (pandas/Polars)
-│   │   └── json_repair.py                  # Heuristic JSON repair for malformed LLM output
-│   │
-│   ├── privacy/                            # ── PII Detection & Scrubbing Layer ──
-│   │   ├── __init__.py                     # Re-exports: scrub_string, scrub_dataframe
-│   │   ├── schemas.py                      # PIIEntity, PIIDetectionResult (Pydantic)
-│   │   ├── detector.py                     # PIIDetector — hybrid router (regex + LLM)
-│   │   ├── regex_detector.py               # RegexDetector — email, phone, credit_card, ip_address
-│   │   ├── llm_detector.py                 # LLMDetector — person, address (via engine.generate)
-│   │   ├── scrub.py                        # scrub_string(), scrub_dataframe() + replace_entities()
-│   │   └── generator.py                    # FakeDataGenerator (Faker) for "fake" mode
-│   │
-│   ├── engine/                             # ── DataFrame Processing Engine ──
-│   │   └── narwhals_ops.py                 # NarwhalsEngine — batch processing, parallel, progress
-│   │
-│   ├── cli/                                # ── Command-Line Interface ──
-│   │   ├── __init__.py                     # Typer app with "model" subgroup
-│   │   ├── model.py                        # "model status" command
-│   │   └── model_commands.py               # check_connection() — Ollama connectivity check
-│   │
-│   ├── utils/                              # ── Utilities ──
-│   │   ├── __init__.py
-│   │   ├── logging.py                      # Rich-compatible module logger
-│   │   ├── rich_output.py                  # Progress bars, tables, cache stats
-│   │   └── resources.py                    # (Stub — grammar/template loaders removed)
-│   │
-│   └── resources/                          # ── Static Resources ──
-│       └── __init__.py                     # (Empty — grammars/templates removed in migration)
-│
-├── tests/
-│   ├── conftest.py                         # Shared fixtures
-│   ├── unit/                               # 318 tests — fast, isolated, mocked
-│   │   ├── test_public_api.py              # Loclean class + clean/scrub/extract functions
-│   │   ├── test_cache.py                   # LocleanCache
-│   │   ├── cli/                            # CLI tests
-│   │   │   ├── test_cli_init.py            # App structure + routing
-│   │   │   ├── test_model.py               # Status command
-│   │   │   └── test_model_commands.py      # check_connection()
-│   │   ├── inference/                      # Inference tests
-│   │   │   ├── test_base.py                # ABC contract
-│   │   │   ├── test_config.py              # Config loading (env, pyproject, defaults)
-│   │   │   ├── test_factory.py             # Engine creation
-│   │   │   └── test_schemas.py             # ExtractionResult
-│   │   ├── extraction/                     # Extraction tests
-│   │   │   ├── test_extractor.py           # Extractor (37 tests)
-│   │   │   ├── test_extract_dataframe.py   # DataFrame extraction
-│   │   │   └── test_json_repair.py         # JSON repair
-│   │   ├── privacy/                        # Privacy tests
-│   │   │   ├── test_detector.py            # PIIDetector hybrid
-│   │   │   ├── test_detector_functions.py  # find_all_positions, resolve_overlaps
-│   │   │   ├── test_llm_detector.py        # LLMDetector (19 tests)
-│   │   │   ├── test_regex_detector.py      # RegexDetector
-│   │   │   ├── test_schemas.py             # PIIEntity, PIIDetectionResult
-│   │   │   ├── test_scrub.py               # scrub_string, scrub_dataframe
-│   │   │   └── test_generator.py           # FakeDataGenerator
-│   │   ├── engine/
-│   │   │   └── test_narwhals_ops.py        # NarwhalsEngine
-│   │   └── utils/
-│   │       ├── test_logging.py
-│   │       ├── test_rich_output.py
-│   │       └── test_resources.py           # (Stub)
-│   ├── integration/                        # Require live Ollama instance
-│   │   ├── test_core.py
-│   │   └── test_reasoning.py
-│   └── scenarios/                          # E2E + UX tests
-│       ├── test_e2e_flows.py
-│       ├── test_error_experience.py
-│       └── test_ux_interface.py
-│
-├── examples/                               # Usage examples
-├── docs-web/                               # Documentation website
-├── scripts/                                # Build/CI scripts
-├── .github/                                # CI/CD workflows
-└── .agent/workflows/                       # Agent workflow definitions
-```
-
----
-
-## Core Components — Detailed Reference
-
-### 1. Public API (`__init__.py`)
-
-The module-level API is the primary entry point. All functions use a **singleton `OllamaEngine`** by default, or accept `model`/`host`/`verbose` overrides to create dedicated instances.
-
-| Symbol | Type | Purpose |
-|--------|------|---------|
-| `Loclean` | Class | OOP interface wrapping `OllamaEngine` + `Extractor` |
-| `clean(df, col, instruction)` | Function | Semantic column cleaning → adds `clean_value`, `clean_unit`, `clean_reasoning` |
-| `scrub(input, strategies, mode)` | Function | PII detection + masking/faking on text or DataFrame |
-| `extract(input, schema)` | Function | Structured extraction via Pydantic schema |
-| `get_engine()` | Function | Singleton `OllamaEngine` manager |
-
-**Key design**: `Loclean` class does lazy local imports (`Extractor`, `Scrub`, `BaseModel`) inside methods to keep import time fast.
-
----
-
-### 2. Inference Layer (`inference/`)
-
-#### `InferenceEngine` (ABC in `base.py`)
-Two abstract methods every engine must implement:
-
-```python
-class InferenceEngine(ABC):
-    @abstractmethod
-    def generate(self, prompt: str, schema: type[BaseModel] | None = None) -> str: ...
-
-    @abstractmethod
-    def clean_batch(self, items: List[str], instruction: str) -> Dict[str, Optional[Dict[str, Any]]]: ...
-```
-
-#### `OllamaEngine` (`ollama_engine.py`)
-- Connects to Ollama HTTP API via `ollama.Client(host=...)`
-- Validates connection in `__init__` by calling `client.list()`
-- `generate()` passes `schema.model_json_schema()` as the `format` kwarg to Ollama's `generate()` endpoint → Ollama constrains output to valid JSON
-- `clean_batch()` iterates items, calls `generate()` with `ExtractionResult` schema, parses JSON
-
-#### `EngineConfig` (`config.py`)
-Pydantic model with hierarchical config loading:
-
-```
-Priority: Runtime params > Env vars (LOCLEAN_*) > pyproject.toml [tool.loclean] > Defaults
-```
-
-| Field | Default | Env Var |
-|-------|---------|---------|
-| `engine` | `"ollama"` | `LOCLEAN_ENGINE` |
-| `model` | `"phi3"` | `LOCLEAN_MODEL` |
-| `host` | `"http://localhost:11434"` | `LOCLEAN_HOST` |
-| `api_key` | `None` | `LOCLEAN_API_KEY` |
-| `verbose` | `False` | `LOCLEAN_VERBOSE` |
-
-#### `create_engine()` (`factory.py`)
-Factory that reads `EngineConfig.engine` and instantiates the correct backend. Only `"ollama"` is implemented; `"openai"`, `"anthropic"`, `"gemini"` raise `NotImplementedError`.
-
----
-
-### 3. Extraction Layer (`extraction/`)
-
-#### `Extractor` (`extractor.py`)
-Core extraction class. Flow:
-
-```
-extract(text, schema, instruction?)
-  → _build_instruction(schema, instruction)
-  → check cache
-  → _extract_with_retry(text, schema, instruction, retry_count=0)
-      → build prompt: f"{instruction}\n\nInput: {text}"
-      → engine.generate(prompt, schema=schema)
-      → _parse_and_validate(raw_output, schema, ...)
-          → json.loads() or json_repair
-          → schema(**data)  # Pydantic validation
-          → on failure → _retry_extraction (up to max_retries)
-  → cache result
-  → return validated BaseModel instance
-```
-
-Also has `extract_batch()` for processing lists with dedup + caching.
-
-#### `extract_dataframe()` (`extract_dataframe.py`)
-Wraps `Extractor.extract_batch()` for DataFrame columns. Handles:
-- Unique value deduplication
-- Polars Struct columns vs pandas dicts
-- `output_type="dict"` or `"pydantic"`
-
-#### `json_repair.py`
-Heuristic JSON repair for truncated/malformed LLM output (bracket balancing, trailing comma removal).
-
----
-
-### 4. Privacy Layer (`privacy/`)
-
-#### Detection Architecture
-
-```mermaid
-graph LR
-    PIIDetector --> RegexDetector
-    PIIDetector --> LLMDetector
-    RegexDetector -- "email, phone, credit_card, ip_address" --> PIIEntity
-    LLMDetector -- "person, address" --> PIIDetectionResult --> PIIEntity
-```
-
-**`PIIDetector`** (`detector.py`) is a hybrid router:
-- **Regex strategies** (fast): `email`, `phone`, `credit_card`, `ip_address` → `RegexDetector`
-- **LLM strategies** (accurate): `person`, `address` → `LLMDetector` → `engine.generate(prompt, PIIDetectionResult)`
-- Merges results → `resolve_overlaps()` (longer match wins)
-
-#### Scrubbing
-
-**`scrub_string()`** and **`scrub_dataframe()`** in `scrub.py`:
-- `mode="mask"` → replaces PII with `[TYPE]` (e.g., `[PERSON]`, `[EMAIL]`)
-- `mode="fake"` → replaces with realistic fake data via `FakeDataGenerator` (requires `faker`, optional dep)
-
-#### Pydantic Schemas (`schemas.py`)
-
-```python
-PIIType = Literal["person", "phone", "email", "credit_card", "address", "ip_address"]
-
-class PIIEntity(BaseModel):
-    type: PIIType
-    value: str
-    start: int
-    end: int
-
-class PIIDetectionResult(BaseModel):
-    entities: list[PIIEntity]
-    reasoning: str | None = None
-```
-
----
-
-### 5. DataFrame Engine (`engine/narwhals_ops.py`)
-
-**`NarwhalsEngine`** — static class for backend-agnostic batch processing:
-- `process_column(df, col, engine, instruction, batch_size, parallel, max_workers)`
-- Deduplicates unique values, chunks into batches
-- Calls `engine.clean_batch()` per chunk
-- Supports `ThreadPoolExecutor` parallel mode
-- Rich progress bars via `utils/rich_output.py`
-- Joins results back to original DataFrame via Narwhals
-
----
-
-### 6. Caching (`cache.py`)
-
-**`LocleanCache`** — SQLite3 persistent cache:
-- Location: `~/.cache/loclean/cache.db`
-- WAL mode for concurrent access
-- Hash key: `SHA256("v3::{instruction}::{text}")`
-- Used by both `Extractor` and `LLMDetector`
-- Context manager support (`with LocleanCache() as cache`)
-
----
-
-### 7. CLI (`cli/`)
-
-Entry point: `loclean` (registered in `pyproject.toml` as script).
-
-```
-loclean
-└── model
-    └── status [--host URL]     # Check Ollama connection, list available models
-```
-
-`check_connection()` in `model_commands.py`:
-- Connects via `ollama.Client(host=...)` (local import)
-- Lists models in a Rich table
-- Shows install instructions on failure
-
----
-
-## Dependencies
-
-### Core (required)
-| Package | Purpose |
-|---------|---------|
-| `narwhals≥2.14.0` | Backend-agnostic DataFrame operations |
-| `pydantic≥2.12.5` | Schema validation + JSON schema generation |
-| `ollama≥0.4.0` | Ollama Python client |
-| `json-repair≥0.27.0` | JSON repair for malformed LLM output |
-| `typer≥0.12.0` | CLI framework |
-| `rich≥14.0.0` | Terminal output formatting |
-
-### Optional extras
-| Extra | Packages | Purpose |
-|-------|----------|---------|
-| `data` | pandas, polars, pyarrow | DataFrame backends |
-| `cloud` | openai, anthropic, google-genai, instructor | Future cloud engines |
-| `privacy` | faker | Fake data generation for PII replacement |
-| `all` | All of the above | Everything |
-
----
-
-## Configuration
-
-### Hierarchical Priority
-```
-1. Runtime params (model=, host=, verbose=)
-2. Environment variables (LOCLEAN_ENGINE, LOCLEAN_MODEL, LOCLEAN_HOST, etc.)
-3. pyproject.toml [tool.loclean] section
-4. Hardcoded defaults (engine=ollama, model=phi3, host=localhost:11434)
-```
-
-### pyproject.toml example
-```toml
-[tool.loclean]
-engine = "ollama"
-model = "llama3"
-host = "http://remote-server:11434"
-verbose = true
-```
-
----
-
-## Development & Tooling
-
-### Setup
-```bash
-uv sync --all-extras --dev           # Install all deps
-ollama serve                          # Start Ollama externally
-ollama pull phi3                      # Pull default model
-```
-
-### PR Readiness Checklist
-```bash
-uv run ruff format .                  # Format
-uv run ruff check . --fix             # Lint
-uv run mypy .                         # Type check
-uv run python -m pytest               # Test (318 unit tests)
-```
-
-### Test Configuration
-- **Framework**: pytest + pytest-cov + pytest-mock
-- **Config**: `pyproject.toml` `[tool.pytest.ini_options]`
-- **Coverage**: Branch coverage, fail-under 50%, XML report
-- **Markers**: `slow`, `cloud`
-
-### Key Design Rules (from user guidelines)
-1. **Use `uv`** for all Python/pip operations
-2. **Use Narwhals** for all DataFrame ops — never import pandas/polars in core logic
-3. **Optional deps** wrapped in `try/except ImportError`
-4. **Never commit to main** — use feature branches + PRs
-5. **Atomic commits** — small, logical chunks
-
----
-
-## Data Flow Examples
-
-### `clean()` flow
-```
-DataFrame → Narwhals wraps → deduplicate unique values → chunk into batches
-→ OllamaEngine.clean_batch(items, instruction) per batch
-    → for each item: generate(prompt, schema=ExtractionResult)
-    → Ollama returns JSON: {"reasoning": "...", "value": 5.5, "unit": "kg"}
-→ join results back to DataFrame → return native DataFrame
-```
-
-### `extract()` flow
-```
-text + Pydantic schema → Extractor
-→ build instruction from schema fields
-→ check LocleanCache
-→ engine.generate(prompt, schema=UserSchema)
-→ Ollama returns constrained JSON
-→ json.loads() → schema(**data) → Pydantic validates
-→ on failure: json_repair → retry with adjusted prompt (up to 3x)
-→ cache result → return validated BaseModel instance
-```
-
-### `scrub()` flow
-```
-text + strategies=["person", "email", "phone"]
-→ PIIDetector.detect(text, strategies)
-    → RegexDetector: email patterns, phone patterns
-    → LLMDetector: engine.generate(prompt, schema=PIIDetectionResult)
-    → merge + resolve_overlaps()
-→ replace_entities(text, entities, mode="mask")
-→ "Contact [PERSON] at [EMAIL] or [PHONE]"
-```
diff --git a/examples/06-entity-resolution.ipynb b/examples/06-entity-resolution.ipynb
new file mode 100644
index 0000000..8008a22
--- /dev/null
+++ b/examples/06-entity-resolution.ipynb
@@ -0,0 +1,129 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🔗 Entity Resolution\n",
+    "\n",
+    "Canonicalize messy string variations (typos, abbreviations, casing) under a single label using `loclean.resolve_entities`.\n",
+    "\n",
+    "**Use case:** You have a column of company names entered by different people — some wrote \"Google\", others \"google\", \"GOOGLE Inc.\", or \"Alphabet / Google\". Entity resolution merges them into one canonical form."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create messy data\n",
+    "\n",
+    "16 company name variations across 5 real companies:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"company\": [\n",
+    "            \"Google LLC\",\n",
+    "            \"google\",\n",
+    "            \"GOOGLE Inc.\",\n",
+    "            \"Alphabet / Google\",\n",
+    "            \"Microsoft Corp\",\n",
+    "            \"microsoft\",\n",
+    "            \"MSFT\",\n",
+    "            \"Apple Inc.\",\n",
+    "            \"apple\",\n",
+    "            \"AAPL\",\n",
+    "            \"Amazon.com Inc\",\n",
+    "            \"amazon\",\n",
+    "            \"AMZN\",\n",
+    "            \"Meta Platforms\",\n",
+    "            \"meta\",\n",
+    "            \"Facebook (Meta)\",\n",
+    "        ]\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Unique values before: {df['company'].n_unique()}\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Resolve entities\n",
+    "\n",
+    "The `threshold` parameter controls how aggressively values are merged (0 = nothing, 1 = everything)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = loclean.resolve_entities(df, \"company\", threshold=0.8)\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare before vs. after"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "unique_before = df[\"company\"].n_unique()\n",
+    "unique_after = result[\"company_canonical\"].n_unique()\n",
+    "merged = unique_before - unique_after\n",
+    "\n",
+    "print(f\"Unique values: {unique_before} → {unique_after} ({merged} merged)\")\n",
+    "result.select([\"company\", \"company_canonical\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/07-oversampling.ipynb b/examples/07-oversampling.ipynb
new file mode 100644
index 0000000..27f4e41
--- /dev/null
+++ b/examples/07-oversampling.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ⚖️ Semantic Oversampling\n",
+    "\n",
+    "Generate synthetic minority-class records using `loclean.oversample`.\n",
+    "\n",
+    "**Use case:** Your dataset has 8 \"healthy\" patients and only 2 \"hypertension\" — the LLM generates semantically plausible synthetic hypertension records to balance the classes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define schema and create imbalanced data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PatientRecord(BaseModel):\n",
+    "    \"\"\"Schema for synthetic patient records.\"\"\"\n",
+    "\n",
+    "    age: int = Field(..., ge=0, le=120, description=\"Patient age\")\n",
+    "    blood_pressure: str = Field(\n",
+    "        ..., description=\"Blood pressure reading, e.g. '120/80'\"\n",
+    "    )\n",
+    "    cholesterol: str = Field(..., description=\"Cholesterol level: Low, Normal, or High\")\n",
+    "    diagnosis: str = Field(..., description=\"Medical diagnosis label\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"age\": [45, 52, 38, 61, 55, 42, 35, 67, 48, 50],\n",
+    "        \"blood_pressure\": [\n",
+    "            \"120/80\",\n",
+    "            \"140/90\",\n",
+    "            \"130/85\",\n",
+    "            \"150/95\",\n",
+    "            \"128/82\",\n",
+    "            \"135/88\",\n",
+    "            \"118/76\",\n",
+    "            \"155/100\",\n",
+    "            \"125/80\",\n",
+    "            \"138/92\",\n",
+    "        ],\n",
+    "        \"cholesterol\": [\n",
+    "            \"Normal\",\n",
+    "            \"High\",\n",
+    "            \"Normal\",\n",
+    "            \"High\",\n",
+    "            \"Normal\",\n",
+    "            \"Normal\",\n",
+    "            \"Low\",\n",
+    "            \"High\",\n",
+    "            \"Normal\",\n",
+    "            \"Normal\",\n",
+    "        ],\n",
+    "        \"diagnosis\": [\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "            \"hypertension\",\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "            \"hypertension\",\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(\"Class distribution (before):\")\n",
+    "print(df[\"diagnosis\"].value_counts())\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate synthetic minority records"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = loclean.oversample(\n",
+    "    df,\n",
+    "    target_col=\"diagnosis\",\n",
+    "    target_value=\"hypertension\",\n",
+    "    n=6,\n",
+    "    schema=PatientRecord,\n",
+    "    batch_size=3,\n",
+    ")\n",
+    "\n",
+    "print(f\"Rows: {len(df)} → {len(result)} (+{len(result) - len(df)} synthetic)\")\n",
+    "print(\"\\nClass distribution (after):\")\n",
+    "print(result[\"diagnosis\"].value_counts())\n",
+    "result"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/08-log-shredding.ipynb b/examples/08-log-shredding.ipynb
new file mode 100644
index 0000000..5bb8a81
--- /dev/null
+++ b/examples/08-log-shredding.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 📋 Log Shredding\n",
+    "\n",
+    "Parse unstructured logs into multiple relational tables using `loclean.shred_to_relations`.\n",
+    "\n",
+    "**Use case:** You have a single column of raw server logs — the LLM infers a relational schema (events, users, errors) and generates a parser that separates them into normalized DataFrames."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create raw log data\n",
+    "\n",
+    "5 realistic server log entries mixing auth, API, payment, inventory, and ML events:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"log_entry\": [\n",
+    "            (\n",
+    "                \"2024-01-15 08:23:11 INFO  [auth-service] \"\n",
+    "                \"User john.doe@corp.com logged in from 192.168.1.42 \"\n",
+    "                \"using Chrome/120.0 on Windows 11. \"\n",
+    "                \"Session: sess_abc123. MFA: enabled.\"\n",
+    "            ),\n",
+    "            (\n",
+    "                \"2024-01-15 08:24:05 WARN  [api-gateway] \"\n",
+    "                \"Rate limit approaching for client_id=clt_789 \"\n",
+    "                \"(plan: enterprise, limit: 10000/min, \"\n",
+    "                \"current: 8500/min). Endpoint: /v2/search.\"\n",
+    "            ),\n",
+    "            (\n",
+    "                \"2024-01-15 08:25:30 ERROR [payment-svc] \"\n",
+    "                \"Transaction tx_456def failed for user jane.smith \"\n",
+    "                \"— amount: $149.99 USD, method: visa_*4242, \"\n",
+    "                \"reason: insufficient_funds. Retry #2 of 3.\"\n",
+    "            ),\n",
+    "            (\n",
+    "                \"2024-01-15 08:26:00 INFO  [inventory] \"\n",
+    "                \"Stock update: SKU=WDG-1001, warehouse=US-EAST-1, \"\n",
+    "                \"qty_before=250, qty_after=248, \"\n",
+    "                \"order_id=ORD-2024-5678.\"\n",
+    "            ),\n",
+    "            (\n",
+    "                \"2024-01-15 08:27:45 DEBUG [ml-pipeline] \"\n",
+    "                \"Model inference complete: model=fraud_v3.2, \"\n",
+    "                \"latency_ms=42, input_features=128, \"\n",
+    "                \"prediction=0.02, threshold=0.5, decision=ALLOW.\"\n",
+    "            ),\n",
+    "        ]\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "for entry in df[\"log_entry\"].to_list():\n",
+    "    print(entry)\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Shred into relational tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tables = loclean.shred_to_relations(df, \"log_entry\", sample_size=5, max_retries=3)\n",
+    "\n",
+    "print(f\"Shredded 1 column → {len(tables)} relational tables\\n\")\n",
+    "\n",
+    "for name, tbl in tables.items():\n",
+    "    print(f\"━━━ {name} ({len(tbl)} rows) ━━━\")\n",
+    "    print(tbl)\n",
+    "    print()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/09-feature-discovery.ipynb b/examples/09-feature-discovery.ipynb
new file mode 100644
index 0000000..8a15807
--- /dev/null
+++ b/examples/09-feature-discovery.ipynb
@@ -0,0 +1,128 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🧬 Feature Discovery\n",
+    "\n",
+    "Automatically discover feature crosses using `loclean.discover_features`.\n",
+    "\n",
+    "**Use case:** Given a housing dataset, the LLM proposes mathematical transformations (e.g. `price_per_sqft = price / square_feet`) that maximise mutual information with the target variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create housing dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"square_feet\": [1200, 1800, 2400, 950, 3100, 1600, 2800, 1100, 2000, 1450],\n",
+    "        \"bedrooms\": [2, 3, 4, 1, 5, 3, 4, 2, 3, 2],\n",
+    "        \"bathrooms\": [1, 2, 3, 1, 3, 2, 3, 1, 2, 2],\n",
+    "        \"year_built\": [1990, 2005, 2018, 1975, 2022, 2000, 2015, 1985, 2010, 1995],\n",
+    "        \"lot_size_acres\": [0.15, 0.25, 0.40, 0.10, 0.60, 0.20, 0.35, 0.12, 0.30, 0.18],\n",
+    "        \"price\": [\n",
+    "            250_000,\n",
+    "            380_000,\n",
+    "            520_000,\n",
+    "            180_000,\n",
+    "            720_000,\n",
+    "            310_000,\n",
+    "            480_000,\n",
+    "            220_000,\n",
+    "            400_000,\n",
+    "            280_000,\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Original columns: {df.columns}\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Discover new features\n",
+    "\n",
+    "The LLM analyses column types, sample values, and correlations to propose transformations:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = loclean.discover_features(\n",
+    "    df, \"price\", n_features=5, max_retries=5, model=\"qwen2.5-coder:1.5b\"\n",
+    ")\n",
+    "\n",
+    "new_cols = [c for c in result.columns if c not in df.columns]\n",
+    "print(f\"Discovered {len(new_cols)} new features: {new_cols}\")\n",
+    "print(f\"Shape: {df.shape} → {result.shape}\")\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect new features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if new_cols:\n",
+    "    result.select(new_cols).describe()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/10-quality-validation.ipynb b/examples/10-quality-validation.ipynb
new file mode 100644
index 0000000..17e85f5
--- /dev/null
+++ b/examples/10-quality-validation.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ✅ Quality Validation\n",
+    "\n",
+    "Check data against natural-language rules using `loclean.validate_quality`.\n",
+    "\n",
+    "**Use case:** Instead of writing complex regex or SQL constraints, describe your data quality rules in plain English and let the LLM evaluate compliance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a dataset with quality issues\n",
+    "\n",
+    "Intentionally includes empty names, invalid emails, out-of-range ages, and negative salaries:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"name\": [\n",
+    "            \"Alice Johnson\",\n",
+    "            \"Bob Smith\",\n",
+    "            \"\",\n",
+    "            \"Diana Prince\",\n",
+    "            \"Eve Adams\",\n",
+    "            None,\n",
+    "            \"Grace Hopper\",\n",
+    "            \"Hank Hill\",\n",
+    "            \"Ivy Chen\",\n",
+    "            \"Jack Black\",\n",
+    "        ],\n",
+    "        \"email\": [\n",
+    "            \"alice@example.com\",\n",
+    "            \"bob@corp.io\",\n",
+    "            \"invalid-email\",\n",
+    "            \"diana@example.com\",\n",
+    "            \"eve@\",\n",
+    "            \"frank@example.com\",\n",
+    "            \"grace@navy.mil\",\n",
+    "            \"hank@propane.com\",\n",
+    "            \"ivychen.com\",\n",
+    "            \"jack@rock.com\",\n",
+    "        ],\n",
+    "        \"age\": [28, 35, -5, 42, 150, 31, 85, 47, 23, 0],\n",
+    "        \"salary\": [\n",
+    "            65_000,\n",
+    "            82_000,\n",
+    "            45_000,\n",
+    "            0,\n",
+    "            95_000,\n",
+    "            71_000,\n",
+    "            120_000,\n",
+    "            58_000,\n",
+    "            -1_000,\n",
+    "            53_000,\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define quality rules in plain English"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rules = [\n",
+    "    \"Name must not be empty or null\",\n",
+    "    \"Email must contain exactly one '@' followed by a domain with a dot\",\n",
+    "    \"Age must be between 1 and 120\",\n",
+    "    \"Salary must be a positive number greater than zero\",\n",
+    "]\n",
+    "\n",
+    "for i, rule in enumerate(rules, 1):\n",
+    "    print(f\"  {i}. {rule}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report = loclean.validate_quality(df, rules, batch_size=10, sample_size=10)\n",
+    "report"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/11-kaggle-housing-pipeline.ipynb b/examples/11-kaggle-housing-pipeline.ipynb
new file mode 100644
index 0000000..22fb01f
--- /dev/null
+++ b/examples/11-kaggle-housing-pipeline.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🏠 Kaggle Housing Price Prediction — Full loclean Workflow\n",
+    "\n",
+    "A realistic **data science** notebook showing how `loclean` accelerates the entire\n",
+    "data-preparation pipeline for a Kaggle-style regression task.\n",
+    "\n",
+    "**Pipeline:** Raw messy data → Clean → Entity Resolution → Feature Discovery → Quality Validation → Model-ready DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean\n",
+    "\n",
+    "MODEL = \"qwen2.5-coder:1.5b\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1 · Raw Data — messy, real-world-like\n",
+    "\n",
+    "Simulates what you'd download from a Kaggle competition: inconsistent formatting,\n",
+    "mixed units, duplicate entity names, class imbalance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw = pl.DataFrame(\n",
+    "    {\n",
+    "        \"address\": [\n",
+    "            \"123 Main St, Springfield, IL\",\n",
+    "            \"456 Oak Ave, springfield, illinois\",\n",
+    "            \"789 Pine Rd, Chicago, IL\",\n",
+    "            \"321 Elm St, chicago, Illinois\",\n",
+    "            \"654 Maple Dr, Naperville, IL\",\n",
+    "            \"987 Cedar Ln, Naperville, IL\",\n",
+    "            \"111 Birch Way, Joliet, IL\",\n",
+    "            \"222 Walnut St, JOLIET, Illinois\",\n",
+    "            \"333 Ash Ct, Peoria, IL\",\n",
+    "            \"444 Spruce Pl, Rockford, IL\",\n",
+    "        ],\n",
+    "        \"city\": [\n",
+    "            \"Springfield\",\n",
+    "            \"springfield\",\n",
+    "            \"Chicago\",\n",
+    "            \"chicago\",\n",
+    "            \"Naperville\",\n",
+    "            \"Naperville\",\n",
+    "            \"Joliet\",\n",
+    "            \"JOLIET\",\n",
+    "            \"Peoria\",\n",
+    "            \"Rockford\",\n",
+    "        ],\n",
+    "        \"size_raw\": [\n",
+    "            \"1,200 sqft\",\n",
+    "            \"1800 sq ft\",\n",
+    "            \"2400 square feet\",\n",
+    "            \"950sqft\",\n",
+    "            \"3,100 sqft\",\n",
+    "            \"1600 sq. ft.\",\n",
+    "            \"2800sqft\",\n",
+    "            \"1100 sqft\",\n",
+    "            \"2,000 sq ft\",\n",
+    "            \"1450 sq ft\",\n",
+    "        ],\n",
+    "        \"bedrooms\": [2, 3, 4, 1, 5, 3, 4, 2, 3, 2],\n",
+    "        \"bathrooms\": [1, 2, 3, 1, 3, 2, 3, 1, 2, 2],\n",
+    "        \"year_built\": [1990, 2005, 2018, 1975, 2022, 2000, 2015, 1985, 2010, 1995],\n",
+    "        \"lot_acres\": [0.15, 0.25, 0.40, 0.10, 0.60, 0.20, 0.35, 0.12, 0.30, 0.18],\n",
+    "        \"is_luxury\": [\"no\", \"no\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"no\", \"no\", \"no\"],\n",
+    "        \"price\": [\n",
+    "            250_000,\n",
+    "            380_000,\n",
+    "            520_000,\n",
+    "            180_000,\n",
+    "            720_000,\n",
+    "            310_000,\n",
+    "            480_000,\n",
+    "            220_000,\n",
+    "            400_000,\n",
+    "            280_000,\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Shape: {raw.shape}\")\n",
+    "print(f\"Columns: {raw.columns}\")\n",
+    "raw"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2 · Data Cleaning — extract numeric values from messy strings\n",
+    "\n",
+    "The `size_raw` column has inconsistent formats: `\"1,200 sqft\"`, `\"2400 square feet\"`,\n",
+    "`\"950sqft\"`. `loclean.clean()` uses the LLM to extract the numeric value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned = loclean.clean(\n",
+    "    raw,\n",
+    "    \"size_raw\",\n",
+    "    instruction=\"Extract the numeric square footage value only, as an integer.\",\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(\"Before → After:\")\n",
+    "cleaned.select(\"size_raw\", \"clean_value\", \"clean_unit\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3 · Entity Resolution — canonicalize city names\n",
+    "\n",
+    "`\"Springfield\"` vs `\"springfield\"` vs `\"JOLIET\"` vs `\"Joliet\"` — the LLM groups\n",
+    "these into canonical forms automatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resolved = loclean.resolve_entities(\n",
+    "    cleaned,\n",
+    "    \"city\",\n",
+    "    threshold=0.8,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(\"Entity resolution results:\")\n",
+    "resolved.select(\"city\", \"city_canonical\").unique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4 · Feature Discovery — LLM-proposed feature engineering\n",
+    "\n",
+    "The LLM analyses column types and sample values, then proposes mathematical\n",
+    "transformations (e.g. `price_per_sqft`, `log_price`, `rooms_per_acre`) that\n",
+    "maximise mutual information with the target."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use only numeric columns for feature discovery\n",
+    "numeric_df = resolved.select(\n",
+    "    \"bedrooms\", \"bathrooms\", \"year_built\", \"lot_acres\", \"price\"\n",
+    ")\n",
+    "\n",
+    "enriched = loclean.discover_features(\n",
+    "    numeric_df,\n",
+    "    \"price\",\n",
+    "    n_features=3,\n",
+    "    max_retries=5,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "new_cols = [c for c in enriched.columns if c not in numeric_df.columns]\n",
+    "print(f\"Discovered {len(new_cols)} features: {new_cols}\")\n",
+    "enriched"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5 · Oversampling — handle class imbalance\n",
+    "\n",
+    "Only 3 of 10 houses are `\"luxury\"`. The LLM generates realistic synthetic\n",
+    "luxury records to balance the dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pydantic import BaseModel\n",
+    "\n",
+    "\n",
+    "class HouseRecord(BaseModel):\n",
+    "    bedrooms: int\n",
+    "    bathrooms: int\n",
+    "    year_built: int\n",
+    "    lot_acres: float\n",
+    "    price: int\n",
+    "    is_luxury: str\n",
+    "\n",
+    "\n",
+    "luxury_before = raw.filter(pl.col(\"is_luxury\") == \"yes\").shape[0]\n",
+    "print(f\"Luxury houses before: {luxury_before} / {raw.shape[0]}\")\n",
+    "\n",
+    "oversampled = loclean.oversample(\n",
+    "    raw.select(\n",
+    "        \"bedrooms\", \"bathrooms\", \"year_built\", \"lot_acres\", \"price\", \"is_luxury\"\n",
+    "    ),\n",
+    "    target_col=\"is_luxury\",\n",
+    "    target_value=\"yes\",\n",
+    "    n=4,\n",
+    "    schema=HouseRecord,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "luxury_after = oversampled.filter(pl.col(\"is_luxury\") == \"yes\").shape[0]\n",
+    "print(f\"Luxury houses after: {luxury_after} / {oversampled.shape[0]}\")\n",
+    "oversampled.tail(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6 · Quality Validation — LLM-powered data auditing\n",
+    "\n",
+    "Define constraints in plain English. The LLM evaluates each row and reports\n",
+    "compliance with reasoning for failures."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report = loclean.validate_quality(\n",
+    "    raw,\n",
+    "    rules=[\n",
+    "        \"Price must be a positive number greater than 50,000\",\n",
+    "        \"Bedrooms must be between 1 and 10\",\n",
+    "        \"Year built must be between 1800 and 2025\",\n",
+    "    ],\n",
+    "    sample_size=10,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(f\"Compliance rate: {report['compliance_rate']:.1%}\")\n",
+    "print(f\"Rows checked: {report['rows_checked']}\")\n",
+    "\n",
+    "if report.get(\"failures\"):\n",
+    "    print(f\"\\nFailures ({len(report['failures'])}):\\n\")\n",
+    "    for f in report[\"failures\"][:3]:\n",
+    "        print(f\"  Row {f['row_index']}: {f['rule']}\")\n",
+    "        print(f\"    Reason: {f['reason']}\\n\")\n",
+    "else:\n",
+    "    print(\"\\n✅ All rows pass quality validation!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7 · Privacy Scrubbing — redact PII before sharing\n",
+    "\n",
+    "Before sharing the dataset (e.g. uploading to Kaggle), scrub any PII."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "original_addresses = raw.select(\"address\").to_series().to_list()\n",
+    "\n",
+    "scrubbed = loclean.scrub(\n",
+    "    raw,\n",
+    "    target_col=\"address\",\n",
+    "    mode=\"mask\",\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "scrubbed_addresses = scrubbed.select(\"address\").to_series().to_list()\n",
+    "\n",
+    "print(\"Original → Scrubbed:\")\n",
+    "for orig, masked in zip(original_addresses[:5], scrubbed_addresses[:5], strict=True):\n",
+    "    print(f\"  {orig}\")\n",
+    "    print(f\"  → {masked}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "| Step | API | What it does |\n",
+    "|------|-----|-------------|\n",
+    "| Clean | `loclean.clean()` | Extract numeric values from messy strings |\n",
+    "| Entity Resolution | `loclean.resolve_entities()` | Canonicalize city names |\n",
+    "| Feature Discovery | `loclean.discover_features()` | LLM-proposed feature engineering |\n",
+    "| Oversampling | `loclean.oversample()` | Generate synthetic minority records |\n",
+    "| Quality Validation | `loclean.validate_quality()` | Data quality audit in plain English |\n",
+    "| Privacy Scrubbing | `loclean.scrub()` | Redact PII before sharing |"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/12-log-engineering-pipeline.ipynb b/examples/12-log-engineering-pipeline.ipynb
new file mode 100644
index 0000000..861ff69
--- /dev/null
+++ b/examples/12-log-engineering-pipeline.ipynb
@@ -0,0 +1,395 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🔧 Data Engineering Pipeline — Log Processing with loclean\n",
+    "\n",
+    "A production-oriented **data engineering** notebook showing how `loclean` handles\n",
+    "unstructured log data: parsing, shredding into relational tables, structured\n",
+    "extraction, and quality validation.\n",
+    "\n",
+    "**Pipeline:** Raw logs → Structured Extraction → Log Shredding → Quality Gates → Clean relational tables\n",
+    "\n",
+    "> **Model:** `qwen2.5-coder:1.5b` (lightweight, code-specialised).\n",
+    "> Swap to `qwen2.5-coder:7b` for complex log formats."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean\n",
+    "\n",
+    "MODEL = \"qwen2.5-coder:1.5b\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1 · Raw Log Data\n",
+    "\n",
+    "Simulates a mixed-format log ingestion: web server access logs with embedded\n",
+    "user agents, IPs, status codes, and response times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logs = pl.DataFrame(\n",
+    "    {\n",
+    "        \"log_entry\": [\n",
+    "            (\n",
+    "                \"192.168.1.10 - admin \"\n",
+    "                \"[2024-01-15 08:23:45] \"\n",
+    "                '\"GET /api/users HTTP/1.1\" '\n",
+    "                '200 1523 0.045s \"Mozilla/5.0\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"10.0.0.55 - jsmith \"\n",
+    "                \"[2024-01-15 08:24:01] \"\n",
+    "                '\"POST /api/orders HTTP/1.1\" '\n",
+    "                '201 892 0.123s \"curl/7.68\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"172.16.0.99 - - \"\n",
+    "                \"[2024-01-15 08:24:15] \"\n",
+    "                '\"GET /health HTTP/1.1\" '\n",
+    "                '200 23 0.002s \"kube-probe/1.28\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"192.168.1.10 - admin \"\n",
+    "                \"[2024-01-15 08:25:30] \"\n",
+    "                '\"DELETE /api/users/42 HTTP/1.1\" '\n",
+    "                '403 156 0.015s \"Mozilla/5.0\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"10.0.0.55 - jsmith \"\n",
+    "                \"[2024-01-15 08:26:00] \"\n",
+    "                '\"PUT /api/orders/100 HTTP/1.1\" '\n",
+    "                '200 445 0.089s \"curl/7.68\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"192.168.2.1 - ops \"\n",
+    "                \"[2024-01-15 08:27:10] \"\n",
+    "                '\"GET /metrics HTTP/1.1\" '\n",
+    "                '200 8921 0.234s \"Prometheus/2.45\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"10.0.0.55 - jsmith \"\n",
+    "                \"[2024-01-15 08:28:00] \"\n",
+    "                '\"POST /api/orders HTTP/1.1\" '\n",
+    "                '500 234 2.105s \"curl/7.68\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"172.16.0.99 - - \"\n",
+    "                \"[2024-01-15 08:29:00] \"\n",
+    "                '\"GET /health HTTP/1.1\" '\n",
+    "                '200 23 0.001s \"kube-probe/1.28\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"192.168.1.10 - admin \"\n",
+    "                \"[2024-01-15 08:30:15] \"\n",
+    "                '\"GET /api/users?page=2 HTTP/1.1\" '\n",
+    "                '200 3201 0.067s \"Mozilla/5.0\"'\n",
+    "            ),\n",
+    "            (\n",
+    "                \"10.0.0.88 - deploy \"\n",
+    "                \"[2024-01-15 08:31:00] \"\n",
+    "                '\"POST /api/deploy HTTP/1.1\" '\n",
+    "                '202 567 1.456s \"Jenkins/2.401\"'\n",
+    "            ),\n",
+    "        ]\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Sample log entries: {logs.shape}\")\n",
+    "logs.head(3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2 · Structured Extraction — parse fields from logs\n",
+    "\n",
+    "Use `loclean.extract()` with a Pydantic schema to parse structured fields\n",
+    "from each log line. The LLM handles format variations automatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pydantic import BaseModel\n",
+    "\n",
+    "\n",
+    "class AccessLog(BaseModel):\n",
+    "    ip: str\n",
+    "    user: str\n",
+    "    timestamp: str\n",
+    "    method: str\n",
+    "    path: str\n",
+    "    status_code: int\n",
+    "    response_bytes: int\n",
+    "    response_time_s: float\n",
+    "    user_agent: str\n",
+    "\n",
+    "\n",
+    "parsed = loclean.extract(\n",
+    "    logs,\n",
+    "    AccessLog,\n",
+    "    target_col=\"log_entry\",\n",
+    "    output_type=\"dataframe\",\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(f\"Parsed {parsed.shape[0]} entries into {parsed.shape[1]} columns\")\n",
+    "parsed.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3 · Compiled Extraction — high-performance parsing\n",
+    "\n",
+    "`extract_compiled()` generates a native Python function (no LLM at runtime),\n",
+    "for 100x faster parsing on large datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compiled_result = loclean.extract_compiled(\n",
+    "    logs,\n",
+    "    \"log_entry\",\n",
+    "    AccessLog,\n",
+    "    instruction=\"Parse the access log entry into structured fields.\",\n",
+    "    max_retries=5,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(f\"Compiled extraction: {compiled_result.shape}\")\n",
+    "compiled_result.head(3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4 · Log Shredding — relational table decomposition\n",
+    "\n",
+    "The LLM infers a relational schema and generates a parser to split\n",
+    "log entries into normalised tables (e.g. `requests`, `users`). This is\n",
+    "useful for loading into a data warehouse."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tables = loclean.shred_to_relations(\n",
+    "    logs,\n",
+    "    \"log_entry\",\n",
+    "    sample_size=10,\n",
+    "    max_retries=5,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(f\"Shredded into {len(tables)} tables:\\n\")\n",
+    "for name, df in tables.items():\n",
+    "    print(f\"  📋 {name}: {df.shape}\")\n",
+    "    print(f\"     Columns: {list(df.columns)}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Inspect each shredded table\n",
+    "for name, df in tables.items():\n",
+    "    print(f\"\\n{'=' * 60}\")\n",
+    "    print(f\"Table: {name}\")\n",
+    "    print(f\"{'=' * 60}\")\n",
+    "    print(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5 · Entity Resolution — canonicalize user identifiers\n",
+    "\n",
+    "User fields like `\"admin\"`, `\"jsmith\"`, `\"-\"` (anonymous) need normalisation.\n",
+    "This helps build consistent user activity tables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"user_raw\": [\n",
+    "            \"admin\",\n",
+    "            \"Admin\",\n",
+    "            \"ADMIN\",\n",
+    "            \"administrator\",\n",
+    "            \"jsmith\",\n",
+    "            \"j.smith\",\n",
+    "            \"john.smith\",\n",
+    "            \"ops\",\n",
+    "            \"operations\",\n",
+    "            \"deploy\",\n",
+    "            \"deployer\",\n",
+    "            \"-\",\n",
+    "            \"anonymous\",\n",
+    "            \"(none)\",\n",
+    "        ]\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "resolved = loclean.resolve_entities(\n",
+    "    user_df,\n",
+    "    \"user_raw\",\n",
+    "    threshold=0.7,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "print(\"User entity resolution:\")\n",
+    "resolved"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6 · Quality Gates — validate processed data\n",
+    "\n",
+    "Before loading into the warehouse, validate the data against\n",
+    "business rules defined in plain English."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quality = loclean.validate_quality(\n",
+    "    logs,\n",
+    "    rules=[\n",
+    "        \"Each log entry must contain an IP address\",\n",
+    "        (\"HTTP status codes must be 3-digit numbers between 100 and 599\"),\n",
+    "        (\"Timestamps must follow ISO-8601 or common datetime format\"),\n",
+    "    ],\n",
+    "    sample_size=10,\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "rate = quality[\"compliance_rate\"]\n",
+    "status = \"✅ PASS\" if rate >= 0.95 else \"❌ FAIL\"\n",
+    "print(f\"Quality gate result: {status}\")\n",
+    "print(f\"Compliance: {rate:.0%}\")\n",
+    "\n",
+    "if quality[\"failures\"]:\n",
+    "    print(f\"\\nTop failures ({len(quality['failures'])}):\\n\")\n",
+    "    for f in quality[\"failures\"][:5]:\n",
+    "        idx = f.get(\"row_index\", \"?\")\n",
+    "        rule = f.get(\"rule\", \"\")\n",
+    "        reason = f.get(\"reason\", \"\")\n",
+    "        print(f\"  Row {idx}: {rule} → {reason}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7 · PII Scrubbing — before data lake ingestion\n",
+    "\n",
+    "Scrub IP addresses and usernames before storing in the data lake."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "original_logs = logs.select(\"log_entry\").to_series().to_list()\n",
+    "\n",
+    "scrubbed = loclean.scrub(\n",
+    "    logs,\n",
+    "    target_col=\"log_entry\",\n",
+    "    mode=\"mask\",\n",
+    "    model=MODEL,\n",
+    ")\n",
+    "\n",
+    "scrubbed_logs = scrubbed.select(\"log_entry\").to_series().to_list()\n",
+    "\n",
+    "print(\"Scrubbed log entries (PII masked):\")\n",
+    "for orig, masked in zip(original_logs[:3], scrubbed_logs[:3], strict=True):\n",
+    "    print(f\"  Before: {orig}\")\n",
+    "    print(f\"  After:  {masked}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "| Step | API | Use Case |\n",
+    "|------|-----|----------|\n",
+    "| Structured Extraction | `loclean.extract()` | Parse log fields into columns |\n",
+    "| Compiled Extraction | `loclean.extract_compiled()` | High-perf native parsing |\n",
+    "| Log Shredding | `loclean.shred_to_relations()` | Split into normalised tables |\n",
+    "| Entity Resolution | `loclean.resolve_entities()` | Canonicalize user IDs |\n",
+    "| Quality Gates | `loclean.validate_quality()` | Pre-load data validation |\n",
+    "| PII Scrubbing | `loclean.scrub()` | Mask PII before lake ingestion |"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/13-trap-pruning.ipynb b/examples/13-trap-pruning.ipynb
new file mode 100644
index 0000000..2fffe46
--- /dev/null
+++ b/examples/13-trap-pruning.ipynb
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🪤 Trap Feature Pruning\n",
+    "\n",
+    "Trap features are columns of random noise that masquerade as valid signals.\n",
+    "They inflate model complexity and hurt generalisation. `prune_traps()` uses\n",
+    "statistical profiling and LLM verification to detect and remove them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create dataset with hidden traps\n",
+    "\n",
+    "We build a small housing dataset with two **real** features (`square_feet`,\n",
+    "`bedrooms`) and two **trap** columns (`noise_a`, `noise_b`) — pure Gaussian\n",
+    "noise that has zero predictive value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rng = np.random.default_rng(42)\n",
+    "\n",
+    "n = 20\n",
+    "sqft = rng.integers(800, 3000, size=n)\n",
+    "beds = rng.integers(1, 6, size=n)\n",
+    "price = sqft * 150 + beds * 10_000 + rng.normal(0, 5000, size=n)\n",
+    "\n",
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"square_feet\": sqft,\n",
+    "        \"bedrooms\": beds,\n",
+    "        \"noise_a\": rng.standard_normal(n).round(4),\n",
+    "        \"noise_b\": rng.standard_normal(n).round(4),\n",
+    "        \"price\": price.astype(int),\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Columns before: {df.columns}\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prune trap features\n",
+    "\n",
+    "The pruner profiles each numeric column's distribution and correlation with\n",
+    "the target, then asks the LLM to confirm whether flagged columns look like\n",
+    "injected noise."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pruned, summary = loclean.prune_traps(\n",
+    "    df,\n",
+    "    target_col=\"price\",\n",
+    "    correlation_threshold=0.05,\n",
+    ")\n",
+    "\n",
+    "print(f\"Columns after:  {pruned.columns}\")\n",
+    "print(f\"Dropped:        {summary['dropped_columns']}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect verdicts\n",
+    "\n",
+    "The summary includes per-column verdicts with the LLM's reasoning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for v in summary.get(\"verdicts\", []):\n",
+    "    status = \"🪤 TRAP\" if v[\"is_trap\"] else \"✅ KEEP\"\n",
+    "    print(f\"{status}  {v['column']}: {v['reason']}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/14-missingness-recognition.ipynb b/examples/14-missingness-recognition.ipynb
new file mode 100644
index 0000000..f05a9bb
--- /dev/null
+++ b/examples/14-missingness-recognition.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🔍 Missingness Recognition\n",
+    "\n",
+    "Not all missing data is random. `recognize_missingness()` detects **Missing\n",
+    "Not At Random (MNAR)** patterns — where the *reason* for missingness\n",
+    "contains signal — and encodes them as boolean feature flags."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create dataset with MNAR pattern\n",
+    "\n",
+    "Imagine a clinical trial where **income** is missing *because* the patient\n",
+    "is unemployed. The missingness itself carries information that a simple\n",
+    "imputation would destroy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"patient_id\": list(range(1, 11)),\n",
+    "        \"age\": [28, 45, 62, 33, 51, 29, 70, 38, 55, 41],\n",
+    "        \"employment\": [\n",
+    "            \"employed\",\n",
+    "            \"employed\",\n",
+    "            \"retired\",\n",
+    "            \"unemployed\",\n",
+    "            \"employed\",\n",
+    "            \"unemployed\",\n",
+    "            \"retired\",\n",
+    "            \"employed\",\n",
+    "            \"employed\",\n",
+    "            \"unemployed\",\n",
+    "        ],\n",
+    "        \"income\": [\n",
+    "            55000,\n",
+    "            82000,\n",
+    "            None,\n",
+    "            None,\n",
+    "            91000,\n",
+    "            None,\n",
+    "            None,\n",
+    "            67000,\n",
+    "            73000,\n",
+    "            None,\n",
+    "        ],\n",
+    "        \"diagnosis\": [\n",
+    "            \"healthy\",\n",
+    "            \"diabetes\",\n",
+    "            \"healthy\",\n",
+    "            \"diabetes\",\n",
+    "            \"healthy\",\n",
+    "            \"diabetes\",\n",
+    "            \"healthy\",\n",
+    "            \"healthy\",\n",
+    "            \"diabetes\",\n",
+    "            \"diabetes\",\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Null counts: {df.null_count().to_dicts()[0]}\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Detect MNAR patterns\n",
+    "\n",
+    "The recogniser samples null vs. non-null rows, asks the LLM to explain\n",
+    "**why** missingness occurs, then compiles a boolean encoder if the pattern\n",
+    "is MNAR."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "augmented, summary = loclean.recognize_missingness(df)\n",
+    "\n",
+    "print(f\"Original columns:  {df.columns}\")\n",
+    "print(f\"Augmented columns: {augmented.columns}\")\n",
+    "augmented"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect pattern summaries\n",
+    "\n",
+    "The summary maps each analysed column to the LLM's explanation of the\n",
+    "missingness pattern."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for col, info in summary.items():\n",
+    "    print(f\"Column: {col}\")\n",
+    "    if isinstance(info, dict):\n",
+    "        for k, v in info.items():\n",
+    "            print(f\"  {k}: {v}\")\n",
+    "    else:\n",
+    "        print(f\"  {info}\")\n",
+    "    print()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/15-leakage-auditing.ipynb b/examples/15-leakage-auditing.ipynb
new file mode 100644
index 0000000..51c61bc
--- /dev/null
+++ b/examples/15-leakage-auditing.ipynb
@@ -0,0 +1,175 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🕵️ Target Leakage Auditing\n",
+    "\n",
+    "Target leakage occurs when features contain information from *after* the\n",
+    "prediction event, making models look accurate during training but fail in\n",
+    "production. `audit_leakage()` uses semantic timeline evaluation to find\n",
+    "and remove leaked columns."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create dataset with leakage\n",
+    "\n",
+    "A loan approval dataset where `approval_date` and `loan_officer_notes`\n",
+    "are generated **after** the approval decision — classic leakage that\n",
+    "wouldn't be available at prediction time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"applicant_age\": [28, 45, 35, 52, 31, 40, 55, 29, 48, 37],\n",
+    "        \"annual_income\": [\n",
+    "            45000,\n",
+    "            92000,\n",
+    "            67000,\n",
+    "            115000,\n",
+    "            38000,\n",
+    "            78000,\n",
+    "            98000,\n",
+    "            42000,\n",
+    "            85000,\n",
+    "            61000,\n",
+    "        ],\n",
+    "        \"credit_score\": [\n",
+    "            680,\n",
+    "            750,\n",
+    "            710,\n",
+    "            800,\n",
+    "            620,\n",
+    "            730,\n",
+    "            770,\n",
+    "            650,\n",
+    "            740,\n",
+    "            700,\n",
+    "        ],\n",
+    "        \"debt_to_income\": [\n",
+    "            0.35,\n",
+    "            0.22,\n",
+    "            0.28,\n",
+    "            0.15,\n",
+    "            0.42,\n",
+    "            0.25,\n",
+    "            0.18,\n",
+    "            0.38,\n",
+    "            0.20,\n",
+    "            0.30,\n",
+    "        ],\n",
+    "        \"approval_date\": [\n",
+    "            \"2024-03-15\",\n",
+    "            \"2024-03-16\",\n",
+    "            \"2024-03-17\",\n",
+    "            \"2024-03-18\",\n",
+    "            None,\n",
+    "            \"2024-03-20\",\n",
+    "            \"2024-03-21\",\n",
+    "            None,\n",
+    "            \"2024-03-23\",\n",
+    "            \"2024-03-24\",\n",
+    "        ],\n",
+    "        \"loan_officer_notes\": [\n",
+    "            \"Approved — good DTI\",\n",
+    "            \"Approved — excellent credit\",\n",
+    "            \"Approved — stable income\",\n",
+    "            \"Approved — premium applicant\",\n",
+    "            \"Denied — high risk\",\n",
+    "            \"Approved — meets criteria\",\n",
+    "            \"Approved — senior applicant\",\n",
+    "            \"Denied — insufficient income\",\n",
+    "            \"Approved — good history\",\n",
+    "            \"Approved — standard case\",\n",
+    "        ],\n",
+    "        \"approved\": [1, 1, 1, 1, 0, 1, 1, 0, 1, 1],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "print(f\"Columns: {df.columns}\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Audit for leakage\n",
+    "\n",
+    "The auditor evaluates whether each feature could have been known\n",
+    "**before** the target event. Provide a `domain` hint for better accuracy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pruned, summary = loclean.audit_leakage(\n",
+    "    df,\n",
+    "    target_col=\"approved\",\n",
+    "    domain=\"Loan approval prediction\",\n",
+    ")\n",
+    "\n",
+    "print(f\"Columns before: {df.columns}\")\n",
+    "print(f\"Columns after:  {pruned.columns}\")\n",
+    "print(f\"Dropped:        {summary['dropped_columns']}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect verdicts\n",
+    "\n",
+    "Each column gets a verdict with the LLM's timeline reasoning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for v in summary.get(\"verdicts\", []):\n",
+    "    status = \"🚨 LEAK\" if v[\"is_leakage\"] else \"✅ SAFE\"\n",
+    "    print(f\"{status}  {v['column']}: {v['reason']}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/16-instruction-optimization.ipynb b/examples/16-instruction-optimization.ipynb
new file mode 100644
index 0000000..cc148cd
--- /dev/null
+++ b/examples/16-instruction-optimization.ipynb
@@ -0,0 +1,135 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🎯 Instruction Optimization\n",
+    "\n",
+    "`optimize_instruction()` uses a reward-driven feedback loop to find the\n",
+    "best extraction prompt. It generates structural variations, evaluates each\n",
+    "against a validation sample, and returns the instruction with the highest\n",
+    "field-level F1 score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "import loclean"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define schema and data\n",
+    "\n",
+    "We want to extract product information from messy free-text descriptions.\n",
+    "The schema defines the target structure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ProductInfo(BaseModel):\n",
+    "    name: str = Field(..., description=\"Product name\")\n",
+    "    weight_kg: float = Field(..., description=\"Weight in kilograms\")\n",
+    "    price_usd: float = Field(..., description=\"Price in US dollars\")\n",
+    "\n",
+    "\n",
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"description\": [\n",
+    "            \"Widget A — 2.5kg, $19.99\",\n",
+    "            \"Gadget B: weight 500g price $45\",\n",
+    "            \"Thingamajig, approx 1.2 kilos, 29.50 USD\",\n",
+    "            \"Widget-A (2500 grams) $19.99\",\n",
+    "            \"gadget-b ~0.5 kg $45.00\",\n",
+    "            \"Super Deluxe Widget 3.0kg @ $89.95\",\n",
+    "            \"Mini Gadget C, 200g, fifteen dollars\",\n",
+    "            \"Pro Widget X — 4.2 kg — USD 129\",\n",
+    "        ]\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Optimize the extraction instruction\n",
+    "\n",
+    "Start from a naive baseline and let the optimizer refine it. The optimizer\n",
+    "samples validation rows, generates prompt variations, scores each against\n",
+    "the schema, and returns the winner."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "best_instruction = loclean.optimize_instruction(\n",
+    "    df,\n",
+    "    target_col=\"description\",\n",
+    "    schema=ProductInfo,\n",
+    "    baseline_instruction=\"Extract product info.\",\n",
+    "    sample_size=6,\n",
+    ")\n",
+    "\n",
+    "print(\"Best instruction found:\")\n",
+    "print(best_instruction)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use the optimized instruction\n",
+    "\n",
+    "Pass the optimized instruction back to `extract()` for production use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = loclean.extract(\n",
+    "    df[\"description\"][0],\n",
+    "    ProductInfo,\n",
+    "    instruction=best_instruction,\n",
+    ")\n",
+    "\n",
+    "print(f\"Input:  {df['description'][0]}\")\n",
+    "print(f\"Output: {result}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/README.md b/examples/README.md
index bef3663..88a4a16 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -98,16 +98,87 @@ Debugging and detailed logging:
 - Debugging Pydantic validation issues
 - Global configuration via environment variables
 
+### 6. [06-entity-resolution.ipynb](./06-entity-resolution.ipynb)
+Entity resolution — canonicalize messy string variations:
+- Merge company-name typos, abbreviations, casing
+- Configurable similarity threshold
+- Before/after comparison
+
+### 7. [07-oversampling.ipynb](./07-oversampling.ipynb)
+Semantic oversampling for imbalanced datasets:
+- Pydantic-schema-driven synthetic record generation
+- Minority-class augmentation
+- Class distribution balancing
+
+### 8. [08-log-shredding.ipynb](./08-log-shredding.ipynb)
+Log shredding — parse unstructured logs into relational tables:
+- Mixed log format parsing (auth, API, payment, inventory, ML)
+- Automatic schema inference
+- One column → multiple normalized DataFrames
+
+### 9. [09-feature-discovery.ipynb](./09-feature-discovery.ipynb)
+Automated feature discovery:
+- LLM-proposed mathematical transformations
+- Housing price dataset example
+- Mutual information maximisation with target variable
+
+### 10. [10-quality-validation.ipynb](./10-quality-validation.ipynb)
+Data quality validation with natural-language rules:
+- Plain-English constraint definitions
+- Structured compliance reports
+- Multi-rule evaluation
+
+### 11. [11-kaggle-housing-pipeline.ipynb](./11-kaggle-housing-pipeline.ipynb)
+🏠 **Data Science** — Kaggle-style housing prediction workflow:
+- Clean messy strings → entity resolution → feature discovery
+- Minority-class oversampling → quality validation → PII scrubbing
+- Full pipeline with `qwen2.5-coder:1.5b`
+
+### 12. [12-log-engineering-pipeline.ipynb](./12-log-engineering-pipeline.ipynb)
+🔧 **Data Engineering** — log processing and warehouse loading:
+- Structured extraction with Pydantic schemas
+- Compiled extraction for high-performance parsing
+- Log shredding into relational tables → quality gates → PII masking
+
+### 13. [13-trap-pruning.ipynb](./13-trap-pruning.ipynb)
+Trap feature detection and removal:
+- Statistical profiling of numeric columns
+- LLM-verified Gaussian noise detection
+- Before/after column comparison with verdicts
+
+### 14. [14-missingness-recognition.ipynb](./14-missingness-recognition.ipynb)
+Missing Not At Random (MNAR) pattern detection:
+- Detect informative missingness patterns
+- Automatic boolean feature flag encoding
+- Clinical dataset example (income ↔ employment)
+
+### 15. [15-leakage-auditing.ipynb](./15-leakage-auditing.ipynb)
+Target leakage detection and removal:
+- Semantic timeline evaluation per column
+- Domain-aware reasoning (loan approval example)
+- Automatic removal of leaked features
+
+### 16. [16-instruction-optimization.ipynb](./16-instruction-optimization.ipynb)
+Reward-driven prompt optimization:
+- Generates structural instruction variations
+- Scores each against validation sample (field-level F1)
+- Returns the best-performing extraction instruction
+
+## Standalone Scripts
+
+| Script | Description |
+|--------|-------------|
+| [`benchmark.py`](./benchmark.py) | Performance benchmark: vectorized dedup + cache speedup on 100K rows |
+| [`eval_demo.py`](./eval_demo.py) | Evaluation framework demo with optional Langfuse tracking |
+
 ## Directory Structure
 
 This directory contains:
 
-- **`*.ipynb`**: Jupyter notebook files demonstrating specific features. Numbered prefixes indicate recommended reading order.
-- **`benchmark.py`**: Performance benchmarking script for comparing different models and configurations. Run with:
-  ```bash
-  python examples/benchmark.py
-  ```
-- **`README.md`**: This file - documentation and guidelines for examples.
+- **`*.ipynb`**: Jupyter notebooks demonstrating specific features. Numbered prefixes indicate recommended reading order.
+- **`benchmark.py`**: Performance benchmarking script.
+- **`eval_demo.py`**: Evaluation framework demo.
+- **`README.md`**: This file.
 
 ## Requirements
 
diff --git a/src/loclean/__init__.py b/src/loclean/__init__.py
index 96d2222..e7d0515 100644
--- a/src/loclean/__init__.py
+++ b/src/loclean/__init__.py
@@ -10,6 +10,7 @@
 __all__ = [
     "__version__",
     "Loclean",
+    "audit_leakage",
     "clean",
     "discover_features",
     "extract",
@@ -17,6 +18,8 @@
     "get_engine",
     "optimize_instruction",
     "oversample",
+    "prune_traps",
+    "recognize_missingness",
     "resolve_entities",
     "scrub",
     "shred_to_relations",
@@ -321,6 +324,96 @@ def discover_features(
         )
         return discoverer.discover(df, target_col)
 
+    def prune_traps(
+        self,
+        df: IntoFrameT,
+        target_col: str,
+        *,
+        correlation_threshold: float = 0.05,
+        max_retries: int = 2,
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Identify and remove trap features.
+
+        Trap features are columns of uncorrelated Gaussian noise
+        that masquerade as valid signals.
+
+        Args:
+            df: Input DataFrame.
+            target_col: Target variable column.
+            correlation_threshold: Absolute correlation below which
+                a column is considered uncorrelated.
+            max_retries: LLM retry budget.
+
+        Returns:
+            Tuple of (pruned DataFrame, summary dict).
+        """
+        from loclean.extraction.trap_pruner import TrapPruner
+
+        pruner = TrapPruner(
+            inference_engine=self.engine,
+            correlation_threshold=correlation_threshold,
+            max_retries=max_retries,
+        )
+        return pruner.prune(df, target_col)
+
+    def recognize_missingness(
+        self,
+        df: IntoFrameT,
+        target_cols: list[str] | None = None,
+        *,
+        sample_size: int = 50,
+        max_retries: int = 3,
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Detect MNAR patterns and encode as boolean features.
+
+        Args:
+            df: Input DataFrame.
+            target_cols: Columns to analyse (default: all with nulls).
+            sample_size: Max null rows to sample per column.
+            max_retries: LLM retry budget.
+
+        Returns:
+            Tuple of (augmented DataFrame, summary dict).
+        """
+        from loclean.extraction.missingness_recognizer import MissingnessRecognizer
+
+        recognizer = MissingnessRecognizer(
+            inference_engine=self.engine,
+            sample_size=sample_size,
+            max_retries=max_retries,
+        )
+        return recognizer.recognize(df, target_cols)
+
+    def audit_leakage(
+        self,
+        df: IntoFrameT,
+        target_col: str,
+        domain: str = "",
+        *,
+        max_retries: int = 2,
+        sample_n: int = 10,
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Detect and remove target-leaking features.
+
+        Args:
+            df: Input DataFrame.
+            target_col: Target variable column.
+            domain: Dataset domain description.
+            max_retries: LLM retry budget.
+            sample_n: Sample rows for the prompt.
+
+        Returns:
+            Tuple of (pruned DataFrame, summary dict).
+        """
+        from loclean.extraction.leakage_auditor import TargetLeakageAuditor
+
+        auditor = TargetLeakageAuditor(
+            inference_engine=self.engine,
+            max_retries=max_retries,
+            sample_n=sample_n,
+        )
+        return auditor.audit(df, target_col, domain)
+
     def validate_quality(
         self,
         df: IntoFrameT,
@@ -853,3 +946,133 @@ def discover_features(
         max_retries=max_retries,
     )
     return discoverer.discover(df, target_col)
+
+
+def prune_traps(
+    df: IntoFrameT,
+    target_col: str,
+    *,
+    correlation_threshold: float = 0.05,
+    max_retries: int = 2,
+    model: Optional[str] = None,
+    host: Optional[str] = None,
+    verbose: Optional[bool] = None,
+    **engine_kwargs: Any,
+) -> tuple[IntoFrameT, dict[str, Any]]:
+    """Identify and remove trap features from a DataFrame.
+
+    Trap features are columns of uncorrelated Gaussian noise that
+    masquerade as valid signals.  Detection relies on statistical
+    distributions and target correlations — column names are ignored.
+
+    Args:
+        df: Input DataFrame (pandas, Polars, etc.).
+        target_col: Column name of the prediction target.
+        correlation_threshold: Absolute correlation threshold.
+        max_retries: LLM retry budget.
+        model: Optional Ollama model tag override.
+        host: Optional Ollama server URL override.
+        verbose: Enable detailed logging.
+        **engine_kwargs: Additional arguments forwarded to OllamaEngine.
+
+    Returns:
+        Tuple of ``(pruned_df, summary)`` where *summary* contains
+        ``dropped_columns`` and ``verdicts``.
+    """
+    from loclean.extraction.trap_pruner import TrapPruner
+
+    inference_engine = _resolve_engine(model, host, verbose, **engine_kwargs)
+
+    pruner = TrapPruner(
+        inference_engine=inference_engine,
+        correlation_threshold=correlation_threshold,
+        max_retries=max_retries,
+    )
+    return pruner.prune(df, target_col)
+
+
+def recognize_missingness(
+    df: IntoFrameT,
+    target_cols: list[str] | None = None,
+    *,
+    sample_size: int = 50,
+    max_retries: int = 3,
+    model: Optional[str] = None,
+    host: Optional[str] = None,
+    verbose: Optional[bool] = None,
+    **engine_kwargs: Any,
+) -> tuple[IntoFrameT, dict[str, Any]]:
+    """Detect MNAR patterns and encode as boolean feature flags.
+
+    Identifies Missing Not At Random patterns where the probability
+    of a value being missing depends on other feature values.
+
+    Args:
+        df: Input DataFrame (pandas, Polars, etc.).
+        target_cols: Columns to analyse (default: all with nulls).
+        sample_size: Max null rows to sample per column.
+        max_retries: LLM retry budget.
+        model: Optional Ollama model tag override.
+        host: Optional Ollama server URL override.
+        verbose: Enable detailed logging.
+        **engine_kwargs: Additional arguments forwarded to OllamaEngine.
+
+    Returns:
+        Tuple of ``(augmented_df, summary)`` where *summary* maps
+        each analysed column to its pattern description.
+    """
+    from loclean.extraction.missingness_recognizer import MissingnessRecognizer
+
+    inference_engine = _resolve_engine(model, host, verbose, **engine_kwargs)
+
+    recognizer = MissingnessRecognizer(
+        inference_engine=inference_engine,
+        sample_size=sample_size,
+        max_retries=max_retries,
+    )
+    return recognizer.recognize(df, target_cols)
+
+
+def audit_leakage(
+    df: IntoFrameT,
+    target_col: str,
+    domain: str = "",
+    *,
+    max_retries: int = 2,
+    sample_n: int = 10,
+    model: Optional[str] = None,
+    host: Optional[str] = None,
+    verbose: Optional[bool] = None,
+    **engine_kwargs: Any,
+) -> tuple[IntoFrameT, dict[str, Any]]:
+    """Detect and remove target-leaking features.
+
+    Identifies features that contain information generated after the
+    target event, where P(Y | X_i) ≈ 1.  Uses semantic timeline
+    evaluation via the LLM.
+
+    Args:
+        df: Input DataFrame (pandas, Polars, etc.).
+        target_col: Column name of the prediction target.
+        domain: Brief dataset domain description.
+        max_retries: LLM retry budget.
+        sample_n: Sample rows for the prompt.
+        model: Optional Ollama model tag override.
+        host: Optional Ollama server URL override.
+        verbose: Enable detailed logging.
+        **engine_kwargs: Additional arguments forwarded to OllamaEngine.
+
+    Returns:
+        Tuple of ``(pruned_df, summary)`` with ``dropped_columns``
+        and ``verdicts``.
+    """
+    from loclean.extraction.leakage_auditor import TargetLeakageAuditor
+
+    inference_engine = _resolve_engine(model, host, verbose, **engine_kwargs)
+
+    auditor = TargetLeakageAuditor(
+        inference_engine=inference_engine,
+        max_retries=max_retries,
+        sample_n=sample_n,
+    )
+    return auditor.audit(df, target_col, domain)
diff --git a/src/loclean/extraction/__init__.py b/src/loclean/extraction/__init__.py
index 64e9d66..460b0b8 100644
--- a/src/loclean/extraction/__init__.py
+++ b/src/loclean/extraction/__init__.py
@@ -9,18 +9,24 @@
 
 if TYPE_CHECKING:
     from .feature_discovery import FeatureDiscovery
+    from .leakage_auditor import TargetLeakageAuditor
+    from .missingness_recognizer import MissingnessRecognizer
     from .optimizer import InstructionOptimizer
     from .oversampler import SemanticOversampler
     from .resolver import EntityResolver
     from .shredder import RelationalShredder
+    from .trap_pruner import TrapPruner
 
 __all__ = [
     "EntityResolver",
     "Extractor",
     "FeatureDiscovery",
     "InstructionOptimizer",
+    "MissingnessRecognizer",
     "RelationalShredder",
     "SemanticOversampler",
+    "TargetLeakageAuditor",
+    "TrapPruner",
     "extract_dataframe_compiled",
 ]
 
@@ -28,8 +34,11 @@
     "EntityResolver": ".resolver",
     "FeatureDiscovery": ".feature_discovery",
     "InstructionOptimizer": ".optimizer",
+    "MissingnessRecognizer": ".missingness_recognizer",
     "RelationalShredder": ".shredder",
     "SemanticOversampler": ".oversampler",
+    "TargetLeakageAuditor": ".leakage_auditor",
+    "TrapPruner": ".trap_pruner",
 }
 
 
diff --git a/src/loclean/extraction/feature_discovery.py b/src/loclean/extraction/feature_discovery.py
index 6aaa939..5ddf5b6 100644
--- a/src/loclean/extraction/feature_discovery.py
+++ b/src/loclean/extraction/feature_discovery.py
@@ -103,21 +103,37 @@ def discover(
                 return result.to_native()  # type: ignore[no-any-return,return-value]
 
         source = self._propose_features(state)
-        fn = self._compile_function(source)
-
         sample_rows = state["sample_rows"]
-        ok, error = self._verify_function(fn, sample_rows, self.timeout_s)
-        retries = 0
-        while not ok and retries < self.max_retries:
-            source = self._repair_function(source, error, state)
+
+        try:
             fn = self._compile_function(source)
             ok, error = self._verify_function(fn, sample_rows, self.timeout_s)
+        except ValueError as exc:
+            ok, error = False, str(exc)
+
+        retries = 0
+        while not ok and retries < self.max_retries:
             retries += 1
+            logger.warning(
+                f"[yellow]⚠[/yellow] Retrying code generation "
+                f"({retries}/{self.max_retries}): {error}"
+            )
+            source = self._repair_function(source, error, state)
+            try:
+                fn = self._compile_function(source)
+                ok, error = self._verify_function(fn, sample_rows, self.timeout_s)
+            except ValueError as exc:
+                ok, error = False, str(exc)
 
         if not ok:
             logger.warning(
-                f"[yellow]⚠[/yellow] Feature generation failed after "
-                f"{self.max_retries} retries: {error} — returning original DataFrame"
+                f"[yellow]⚠[/yellow] The model could not generate valid Python "
+                f"code after {self.max_retries} retries. This is not a library "
+                f"bug — smaller models (e.g. phi3) sometimes produce syntax "
+                f"errors or invalid logic. Returning the original DataFrame.\n"
+                f"  [dim]Last error: {error}[/dim]\n"
+                f"  [dim]Tip: try a larger model "
+                f"(model='qwen2.5-coder:7b') or increase max_retries.[/dim]"
             )
             return df
 
@@ -199,11 +215,28 @@ def _propose_features(self, state: dict[str, Any]) -> str:
             "maximise mutual information I(X_new; Y) with the target.\n\n"
             "Write a pure Python function with this exact signature:\n\n"
             "def generate_features(row: dict) -> dict:\n\n"
-            "The function must:\n"
+            "EXAMPLE (for a different dataset with columns "
+            "'age', 'income', 'debt'):\n\n"
+            "import math\n\n"
+            "def generate_features(row: dict) -> dict:\n"
+            "    result = {}\n"
+            "    try:\n"
+            "        result['debt_to_income'] = "
+            "row['debt'] / row['income'] if row['income'] else None\n"
+            "    except Exception:\n"
+            "        result['debt_to_income'] = None\n"
+            "    try:\n"
+            "        result['log_income'] = "
+            "math.log(row['income']) if row['income'] and "
+            "row['income'] > 0 else None\n"
+            "    except Exception:\n"
+            "        result['log_income'] = None\n"
+            "    return result\n\n"
+            "Now write yours for the dataset above. The function must:\n"
             "- Accept a dict of column_name: value pairs\n"
             f"- Return a dict with exactly {self.n_features} new "
             "key-value pairs (the new feature names and values)\n"
-            "- Use ONLY standard library modules (math, etc.)\n"
+            "- Use ONLY standard library modules (math, statistics, operator)\n"
             "- Wrap each calculation in try/except, defaulting to "
             "None on failure\n"
             "- Use descriptive feature names like 'ratio_a_b' or "
@@ -213,12 +246,7 @@ def _propose_features(self, state: dict[str, Any]) -> str:
         )
 
         raw = self.inference_engine.generate(prompt)
-        source = str(raw).strip()
-        if source.startswith("```"):
-            lines = source.split("\n")
-            lines = [line for line in lines if not line.strip().startswith("```")]
-            source = "\n".join(lines)
-        return source
+        return str(raw).strip()
 
     # ------------------------------------------------------------------
     # Compilation
@@ -230,6 +258,10 @@ def _compile_function(
     ) -> Callable[[dict[str, Any]], dict[str, Any]]:
         """Compile source code in a restricted sandbox.
 
+        Applies deterministic sanitization before compilation to fix
+        common LLM output artifacts (markdown fences, non-ASCII
+        operators, invalid literals, etc.).
+
         Args:
             source: Python source containing ``generate_features``.
 
@@ -240,8 +272,13 @@ def _compile_function(
             ValueError: If compilation fails or function not found.
         """
         from loclean.utils.sandbox import compile_sandboxed
+        from loclean.utils.source_sanitizer import sanitize_source
 
-        return compile_sandboxed(source, "generate_features", ["math"])
+        return compile_sandboxed(
+            sanitize_source(source),
+            "generate_features",
+            ["math", "statistics", "operator"],
+        )
 
     # ------------------------------------------------------------------
     # Verification
@@ -312,12 +349,7 @@ def _repair_function(
         )
 
         raw = self.inference_engine.generate(prompt)
-        repaired = str(raw).strip()
-        if repaired.startswith("```"):
-            lines = repaired.split("\n")
-            lines = [line for line in lines if not line.strip().startswith("```")]
-            repaired = "\n".join(lines)
-        return repaired
+        return str(raw).strip()
 
     # ------------------------------------------------------------------
     # Application
diff --git a/src/loclean/extraction/leakage_auditor.py b/src/loclean/extraction/leakage_auditor.py
new file mode 100644
index 0000000..67bdb3d
--- /dev/null
+++ b/src/loclean/extraction/leakage_auditor.py
@@ -0,0 +1,240 @@
+"""Semantic target leakage detection via LLM-driven timeline evaluation.
+
+Identifies features that mathematically or logically imply the target
+variable — i.e. columns containing information generated *after* the
+target event occurs.  In a deterministic leakage scenario:
+
+    P(Y | X_i) ≈ 1
+
+The generative engine acts as a semantic auditor, catching logical
+leakage that basic statistical tests miss by evaluating the causal
+timeline of each feature relative to the target outcome.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import TYPE_CHECKING, Any
+
+import narwhals as nw
+
+from loclean.utils.logging import configure_module_logger
+
+if TYPE_CHECKING:
+    from narwhals.typing import IntoFrameT
+
+    from loclean.inference.base import InferenceEngine
+
+logger = configure_module_logger(__name__, level=logging.INFO)
+
+
+class TargetLeakageAuditor:
+    """Detect and remove features that leak the target variable.
+
+    For each feature column the auditor prompts the LLM with the
+    dataset domain description and a representative sample, asking
+    it to evaluate whether the feature could only be known *after*
+    the target outcome is determined.
+
+    Args:
+        inference_engine: Ollama (or compatible) engine.
+        max_retries: LLM generation retry budget.
+        sample_n: Number of sample rows to include in the prompt.
+    """
+
+    def __init__(
+        self,
+        inference_engine: InferenceEngine,
+        *,
+        max_retries: int = 2,
+        sample_n: int = 10,
+    ) -> None:
+        self.inference_engine = inference_engine
+        self.max_retries = max_retries
+        self.sample_n = sample_n
+
+    def audit(
+        self,
+        df: IntoFrameT,
+        target_col: str,
+        domain: str = "",
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Audit features for target leakage and drop offenders.
+
+        Args:
+            df: Input DataFrame (pandas, Polars, etc.).
+            target_col: Column name of the prediction target.
+            domain: Brief text description of the dataset domain
+                (e.g. ``"hospital readmission prediction"``).
+
+        Returns:
+            Tuple of ``(pruned_df, summary)`` where *summary*
+            contains ``dropped_columns`` and per-column ``verdicts``.
+        """
+        df_nw = nw.from_native(df)  # type: ignore[type-var]
+
+        if target_col not in df_nw.columns:
+            raise ValueError(f"Target column '{target_col}' not found")
+
+        feature_cols = [c for c in df_nw.columns if c != target_col]
+        if not feature_cols:
+            logger.info("No feature columns to audit.")
+            return df, {"dropped_columns": [], "verdicts": []}
+
+        state = self._extract_state(df_nw, target_col, feature_cols)
+        prompt = self._build_prompt(state, domain)
+        verdicts = self._evaluate_with_llm(prompt)
+
+        leaked = [v["column"] for v in verdicts if v.get("is_leakage")]
+        valid_leaked = [c for c in leaked if c in feature_cols]
+
+        summary: dict[str, Any] = {
+            "dropped_columns": valid_leaked,
+            "verdicts": verdicts,
+        }
+
+        if valid_leaked:
+            logger.info(
+                "Dropping %d leaked feature(s): %s",
+                len(valid_leaked),
+                valid_leaked,
+            )
+            try:
+                pruned_nw = df_nw.drop(valid_leaked)
+                return nw.to_native(pruned_nw), summary  # type: ignore[type-var]
+            except Exception as exc:
+                logger.warning("Failed to drop columns: %s", exc)
+                return df, summary
+
+        logger.info("No target leakage detected.")
+        return df, summary
+
+    # ------------------------------------------------------------------
+    # State extraction
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_state(
+        df_nw: nw.DataFrame[Any],
+        target_col: str,
+        feature_cols: list[str],
+        sample_n: int = 10,
+    ) -> dict[str, Any]:
+        """Build structural metadata for the LLM prompt.
+
+        Args:
+            df_nw: Narwhals DataFrame.
+            target_col: Target variable column.
+            feature_cols: Feature column names.
+            sample_n: Number of sample rows.
+
+        Returns:
+            Dict with ``target_col``, ``features``, ``dtypes``,
+            and ``sample_rows``.
+        """
+        n = min(df_nw.shape[0], sample_n)
+        sampled = df_nw.head(n)
+        sample_rows = sampled.rows(named=True)
+
+        dtypes = {col: str(df_nw[col].dtype) for col in feature_cols}
+
+        return {
+            "target_col": target_col,
+            "features": feature_cols,
+            "dtypes": dtypes,
+            "sample_rows": sample_rows,  # type: ignore[dict-item]
+        }
+
+    # ------------------------------------------------------------------
+    # Prompt construction
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_prompt(
+        state: dict[str, Any],
+        domain: str,
+    ) -> str:
+        """Build the LLM prompt for timeline evaluation."""
+        target = state["target_col"]
+        features = state["features"]
+        dtypes = state["dtypes"]
+        sample_str = json.dumps(state["sample_rows"][:10], indent=2, default=str)
+
+        domain_line = f"Dataset domain: {domain}\n" if domain else ""
+
+        feature_info = "\n".join(
+            f"  - {f} (dtype: {dtypes.get(f, 'unknown')})" for f in features
+        )
+
+        return (
+            "You are a machine learning auditor specialising in data "
+            "leakage detection.\n\n"
+            f"{domain_line}"
+            f"Target variable: '{target}'\n\n"
+            f"Feature columns:\n{feature_info}\n\n"
+            f"Sample rows:\n{sample_str}\n\n"
+            "Task: For each feature column, evaluate whether it could "
+            "constitute **target leakage** — meaning the feature contains "
+            "information that would only be available AFTER the target "
+            "outcome is determined.\n\n"
+            "Consider:\n"
+            "- Temporal ordering: was this feature generated before or "
+            "after the target event?\n"
+            "- Semantic meaning: does the feature directly encode or "
+            "trivially derive from the target?\n"
+            "- Statistical signal: extremely high correlation may "
+            "indicate leakage, not just a good predictor.\n\n"
+            "Output ONLY a JSON array. For each feature, output an "
+            "object with exactly three keys:\n"
+            '- "column": the feature name\n'
+            '- "is_leakage": boolean\n'
+            '- "reason": brief explanation\n\n'
+            "Output ONLY the JSON array, no other text."
+        )
+
+    # ------------------------------------------------------------------
+    # LLM evaluation
+    # ------------------------------------------------------------------
+
+    def _evaluate_with_llm(
+        self,
+        prompt: str,
+    ) -> list[dict[str, Any]]:
+        """Send the prompt and parse the leakage verdicts."""
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                raw = str(self.inference_engine.generate(prompt)).strip()
+                return self._parse_verdict(raw)
+            except (json.JSONDecodeError, ValueError, KeyError) as exc:
+                logger.warning(
+                    "LLM verdict parsing failed (attempt %d/%d): %s",
+                    attempt,
+                    self.max_retries,
+                    exc,
+                )
+        logger.warning("Could not parse LLM verdicts — keeping all columns.")
+        return []
+
+    @staticmethod
+    def _parse_verdict(response: str) -> list[dict[str, Any]]:
+        """Parse the JSON verdict from the LLM response."""
+        text = response.strip()
+        start = text.find("[")
+        end = text.rfind("]")
+        if start == -1 or end == -1:
+            raise ValueError("No JSON array found in LLM response")
+
+        items: list[dict[str, Any]] = json.loads(text[start : end + 1])
+        verdicts: list[dict[str, Any]] = []
+
+        for item in items:
+            verdicts.append(
+                {
+                    "column": str(item["column"]),
+                    "is_leakage": bool(item.get("is_leakage", False)),
+                    "reason": str(item.get("reason", "")),
+                }
+            )
+
+        return verdicts
diff --git a/src/loclean/extraction/missingness_recognizer.py b/src/loclean/extraction/missingness_recognizer.py
new file mode 100644
index 0000000..e6ad695
--- /dev/null
+++ b/src/loclean/extraction/missingness_recognizer.py
@@ -0,0 +1,307 @@
+"""Missingness pattern recognition via LLM-driven MNAR detection.
+
+Identifies Missing Not At Random (MNAR) patterns where the probability
+of a value being missing in feature X depends on the value of feature Y:
+
+    P(X_missing | Y) ≠ P(X_missing)
+
+Uses Narwhals for backend-agnostic null analysis and an InferenceEngine
+to infer structural correlations from data samples.  Detected patterns
+are encoded as new boolean feature columns.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import TYPE_CHECKING, Any, Callable
+
+import narwhals as nw
+
+from loclean.utils.logging import configure_module_logger
+
+if TYPE_CHECKING:
+    from narwhals.typing import IntoFrameT
+
+    from loclean.inference.base import InferenceEngine
+
+logger = configure_module_logger(__name__, level=logging.INFO)
+
+_SUFFIX = "_mnar"
+
+
+class MissingnessRecognizer:
+    """Detect MNAR patterns and encode them as boolean feature flags.
+
+    For each column containing nulls the recognizer:
+
+    1. Samples rows where the column is null alongside other features.
+    2. Prompts the LLM to identify structural correlations.
+    3. Compiles the LLM-generated ``encode_missingness`` function in a
+       sandbox.
+    4. Applies the function across the DataFrame to create a boolean
+       ``{col}_mnar`` column.
+
+    Args:
+        inference_engine: Ollama (or compatible) engine.
+        sample_size: Maximum null rows to sample per column.
+        max_retries: LLM code-generation retry budget.
+        timeout_s: Per-row execution timeout in seconds.
+    """
+
+    def __init__(
+        self,
+        inference_engine: InferenceEngine,
+        *,
+        sample_size: int = 50,
+        max_retries: int = 3,
+        timeout_s: float = 2.0,
+    ) -> None:
+        self.inference_engine = inference_engine
+        self.sample_size = sample_size
+        self.max_retries = max_retries
+        self.timeout_s = timeout_s
+
+    def recognize(
+        self,
+        df: IntoFrameT,
+        target_cols: list[str] | None = None,
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Detect MNAR patterns and add boolean feature columns.
+
+        Args:
+            df: Input DataFrame (pandas, Polars, etc.).
+            target_cols: Columns to analyse for missingness.  If
+                ``None``, all columns containing nulls are evaluated.
+
+        Returns:
+            Tuple of ``(augmented_df, summary)`` where *summary*
+            maps each analysed column to its pattern description
+            or ``None`` if no pattern was found.
+        """
+        df_nw = nw.from_native(df)  # type: ignore[type-var]
+
+        null_cols = self._find_null_columns(df_nw)
+
+        if target_cols is not None:
+            null_cols = [c for c in target_cols if c in null_cols]
+
+        if not null_cols:
+            logger.info("No columns with null values to analyse.")
+            return df, {"patterns": {}}
+
+        all_cols = df_nw.columns
+        patterns: dict[str, Any] = {}
+        new_columns: dict[str, list[bool]] = {}
+
+        for col in null_cols:
+            context_cols = [c for c in all_cols if c != col]
+            sample = self._sample_null_context(df_nw, col, context_cols)
+
+            if not sample:
+                patterns[col] = None
+                continue
+
+            prompt = self._build_prompt(col, context_cols, sample)
+            fn = self._generate_and_compile(prompt)
+
+            if fn is None:
+                patterns[col] = None
+                continue
+
+            ok, error = self._verify_encoder(fn, sample)
+            if not ok:
+                logger.warning("Encoder for '%s' failed verification: %s", col, error)
+                patterns[col] = None
+                continue
+
+            flags = self._apply_encoder(df_nw, fn)
+            col_name = f"{col}{_SUFFIX}"
+            new_columns[col_name] = flags
+            patterns[col] = {
+                "encoded_as": col_name,
+                "null_count": sum(1 for v in df_nw[col].to_list() if v is None),
+                "pattern_flags_true": sum(flags),
+            }
+            logger.info(
+                "Encoded MNAR pattern for '%s' → '%s' (%d flagged)",
+                col,
+                col_name,
+                sum(flags),
+            )
+
+        if new_columns:
+            native_ns = nw.get_native_namespace(df_nw)
+            rows_data: dict[str, list[Any]] = {
+                c: df_nw[c].to_list() for c in df_nw.columns
+            }
+            rows_data.update(new_columns)
+            result_nw = nw.from_dict(rows_data, backend=native_ns)
+            return nw.to_native(result_nw), {"patterns": patterns}
+
+        return df, {"patterns": patterns}
+
+    # ------------------------------------------------------------------
+    # Null detection
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _find_null_columns(df_nw: nw.DataFrame[Any]) -> list[str]:
+        """Return column names that contain at least one null."""
+        return [col for col in df_nw.columns if df_nw[col].null_count() > 0]
+
+    # ------------------------------------------------------------------
+    # Sampling
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _sample_null_context(
+        df_nw: nw.DataFrame[Any],
+        null_col: str,
+        context_cols: list[str],
+        max_rows: int = 50,
+    ) -> list[dict[str, Any]]:
+        """Extract rows where *null_col* is null with context values.
+
+        Returns a list of dicts, each containing the context column
+        values for a row where *null_col* is missing.
+        """
+        null_mask = df_nw[null_col].is_null()
+        null_rows = df_nw.filter(null_mask)
+
+        if null_rows.shape[0] == 0:
+            return []
+
+        n = min(null_rows.shape[0], max_rows)
+        sampled = null_rows.head(n)
+
+        select_cols = [c for c in context_cols if c in sampled.columns]
+        if not select_cols:
+            return []
+
+        return sampled.select(select_cols).rows(named=True)  # type: ignore[return-value]
+
+    # ------------------------------------------------------------------
+    # Prompt construction
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_prompt(
+        null_col: str,
+        context_cols: list[str],
+        sample_rows: list[dict[str, Any]],
+    ) -> str:
+        """Build the LLM prompt for pattern inference."""
+        sample_str = json.dumps(sample_rows[:20], indent=2, default=str)
+
+        return (
+            "You are a data scientist analysing missing data patterns.\n\n"
+            f"Column '{null_col}' has missing values. Below are sample rows "
+            "where this column IS NULL, showing the values of the other "
+            "columns:\n\n"
+            f"Context columns: {context_cols}\n\n"
+            f"Sample rows (where '{null_col}' is null):\n{sample_str}\n\n"
+            "Task: Identify if there is a structural pattern that predicts "
+            f"when '{null_col}' is missing based on other column values.\n\n"
+            "Write a pure Python function with this exact signature:\n\n"
+            "def encode_missingness(row: dict) -> bool:\n"
+            "    ...\n\n"
+            "The function receives a dict of ALL column values for a row "
+            "(including the target column) and returns True if the "
+            "missingness pattern is detected.\n\n"
+            "Rules:\n"
+            "- Use ONLY standard library modules (math, statistics, operator)\n"
+            "- Wrap logic in try/except returning False on failure\n"
+            "- Return a single boolean value\n"
+            "- Do NOT use markdown fences, comments, or prose\n"
+            "- Output ONLY the function code, nothing else\n\n"
+            "Example:\n"
+            "def encode_missingness(row: dict) -> bool:\n"
+            "    try:\n"
+            "        return row.get('category') == 'electronics' "
+            "and row.get('price', 0) > 500\n"
+            "    except Exception:\n"
+            "        return False\n"
+        )
+
+    # ------------------------------------------------------------------
+    # Code generation + compilation
+    # ------------------------------------------------------------------
+
+    def _generate_and_compile(
+        self,
+        prompt: str,
+    ) -> Callable[[dict[str, Any]], bool] | None:
+        """Generate, sanitize, and compile the encoder function."""
+        import re
+
+        from loclean.utils.sandbox import compile_sandboxed
+
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                raw = str(self.inference_engine.generate(prompt)).strip()
+                source = re.sub(r"```(?:python)?\s*\n?", "", raw).strip()
+                fn = compile_sandboxed(
+                    source,
+                    "encode_missingness",
+                    ["math", "statistics", "operator"],
+                )
+                return fn  # type: ignore[return-value]
+            except (ValueError, SyntaxError) as exc:
+                logger.warning(
+                    "⚠ Code generation failed (attempt %d/%d): %s",
+                    attempt,
+                    self.max_retries,
+                    exc,
+                )
+
+        logger.warning(
+            "Could not compile encoder after %d retries.",
+            self.max_retries,
+        )
+        return None
+
+    # ------------------------------------------------------------------
+    # Verification
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _verify_encoder(
+        fn: Callable[[dict[str, Any]], bool],
+        sample_rows: list[dict[str, Any]],
+    ) -> tuple[bool, str]:
+        """Test the encoder on sample rows."""
+        from loclean.utils.sandbox import run_with_timeout
+
+        for row in sample_rows[:5]:
+            result, error = run_with_timeout(fn, (row,), 2.0)
+            if error:
+                return False, f"Execution error: {error}"
+            if not isinstance(result, bool):
+                return False, f"Expected bool, got {type(result).__name__}"
+
+        return True, ""
+
+    # ------------------------------------------------------------------
+    # Application
+    # ------------------------------------------------------------------
+
+    def _apply_encoder(
+        self,
+        df_nw: nw.DataFrame[Any],
+        fn: Callable[[dict[str, Any]], bool],
+    ) -> list[bool]:
+        """Apply the encoder across all rows."""
+        from loclean.utils.sandbox import run_with_timeout
+
+        rows: list[dict[str, Any]] = df_nw.rows(named=True)  # type: ignore[assignment]
+        flags: list[bool] = []
+
+        for row in rows:
+            result, error = run_with_timeout(fn, (row,), self.timeout_s)
+            if error or not isinstance(result, bool):
+                flags.append(False)
+            else:
+                flags.append(result)
+
+        return flags
diff --git a/src/loclean/extraction/shredder.py b/src/loclean/extraction/shredder.py
index eabdcd1..432e7d8 100644
--- a/src/loclean/extraction/shredder.py
+++ b/src/loclean/extraction/shredder.py
@@ -146,22 +146,40 @@ def shred(
                 return self._separate_tables(results, schema, native_ns)
 
         source = self._generate_extractor(schema, samples)
-        extract_fn = self._compile_function(source)
 
-        ok, error = self._verify_function(extract_fn, samples, schema, self.timeout_s)
-        retries = 0
-        while not ok and retries < self.max_retries:
-            source = self._repair_function(source, error, samples)
+        try:
             extract_fn = self._compile_function(source)
             ok, error = self._verify_function(
                 extract_fn, samples, schema, self.timeout_s
             )
+        except ValueError as exc:
+            ok, error = False, str(exc)
+
+        retries = 0
+        while not ok and retries < self.max_retries:
             retries += 1
+            logger.warning(
+                f"[yellow]⚠[/yellow] Retrying code generation "
+                f"({retries}/{self.max_retries}): {error}"
+            )
+            source = self._repair_function(source, error, samples)
+            try:
+                extract_fn = self._compile_function(source)
+                ok, error = self._verify_function(
+                    extract_fn, samples, schema, self.timeout_s
+                )
+            except ValueError as exc:
+                ok, error = False, str(exc)
 
         if not ok:
             logger.warning(
-                f"[yellow]⚠[/yellow] Code generation failed after "
-                f"{self.max_retries} retries: {error} — returning empty result"
+                f"[yellow]⚠[/yellow] The model could not generate valid Python "
+                f"code after {self.max_retries} retries. This is not a library "
+                f"bug — smaller models (e.g. phi3) sometimes produce syntax "
+                f"errors or invalid logic. Returning empty result.\n"
+                f"  [dim]Last error: {error}[/dim]\n"
+                f"  [dim]Tip: try a larger model "
+                f"(model='qwen2.5-coder:7b') or increase max_retries.[/dim]"
             )
             return {}
 
@@ -310,8 +328,26 @@ def _generate_extractor(
             f"Target tables:\n{table_specs}\n\n"
             "Sample log entries:\n"
             f"{json.dumps(samples[:5], ensure_ascii=False)}\n\n"
+            "EXAMPLE (for a different log format):\n\n"
+            "import re\n\n"
+            "def extract_relations(log: str) -> dict[str, dict]:\n"
+            "    result = {}\n"
+            "    try:\n"
+            "        m = re.match("
+            "r'(\\S+) (\\S+) \\[(.*?)\\] \"(\\S+)\"', log)\n"
+            "        if m:\n"
+            "            result['requests'] = {\n"
+            "                'ip': m.group(1),\n"
+            "                'method': m.group(4),\n"
+            "            }\n"
+            "    except Exception:\n"
+            "        result['requests'] = "
+            "{'ip': '', 'method': ''}\n"
+            "    return result\n\n"
+            "Now write yours for the log format above.\n\n"
             "Rules:\n"
-            "- Use ONLY standard library modules (re, string, etc.)\n"
+            "- Use ONLY standard library modules (re, json, "
+            "datetime, collections)\n"
             "- Wrap parsing logic in try/except blocks\n"
             "- Return empty strings for fields that cannot be parsed\n"
             "- Do NOT import any third-party libraries\n\n"
@@ -319,12 +355,7 @@ def _generate_extractor(
         )
 
         raw = self.inference_engine.generate(prompt)
-        source = str(raw).strip()
-        if source.startswith("```"):
-            lines = source.split("\n")
-            lines = [line for line in lines if not line.strip().startswith("```")]
-            source = "\n".join(lines)
-        return source
+        return str(raw).strip()
 
     @staticmethod
     def _compile_function(
@@ -332,6 +363,10 @@ def _compile_function(
     ) -> Callable[[str], dict[str, dict[str, Any]]]:
         """Compile source code in a restricted sandbox.
 
+        Applies deterministic sanitization before compilation to fix
+        common LLM output artifacts (markdown fences, non-ASCII
+        operators, invalid literals, etc.).
+
         Args:
             source: Python source containing ``extract_relations``.
 
@@ -342,9 +377,10 @@ def _compile_function(
             ValueError: If compilation fails or function not found.
         """
         from loclean.utils.sandbox import compile_sandboxed
+        from loclean.utils.source_sanitizer import sanitize_source
 
         return compile_sandboxed(
-            source,
+            sanitize_source(source),
             "extract_relations",
             ["re", "json", "datetime", "collections"],
         )
@@ -414,12 +450,7 @@ def _repair_function(
         )
 
         raw = self.inference_engine.generate(prompt)
-        repaired = str(raw).strip()
-        if repaired.startswith("```"):
-            lines = repaired.split("\n")
-            lines = [line for line in lines if not line.strip().startswith("```")]
-            repaired = "\n".join(lines)
-        return repaired
+        return str(raw).strip()
 
     # ------------------------------------------------------------------
     # Phase 3: Full execution + separation
diff --git a/src/loclean/extraction/trap_pruner.py b/src/loclean/extraction/trap_pruner.py
new file mode 100644
index 0000000..376b933
--- /dev/null
+++ b/src/loclean/extraction/trap_pruner.py
@@ -0,0 +1,343 @@
+"""Automated trap feature pruning via statistical profiling and LLM verification.
+
+Identifies columns that look like valid signals but are actually
+uncorrelated Gaussian noise (trap features).  Uses Narwhals for
+backend-agnostic statistical profiling and an ``InferenceEngine``
+for generative verification.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import TYPE_CHECKING, Any
+
+import narwhals as nw
+
+from loclean.utils.logging import configure_module_logger
+
+if TYPE_CHECKING:
+    from narwhals.typing import IntoFrameT
+
+    from loclean.inference.base import InferenceEngine
+
+logger = configure_module_logger(__name__, level=logging.INFO)
+
+
+class _ColumnProfile:
+    """Statistical profile for a single numeric column."""
+
+    __slots__ = (
+        "name",
+        "mean",
+        "std",
+        "variance",
+        "skewness",
+        "kurtosis",
+        "min_val",
+        "max_val",
+        "corr_with_target",
+    )
+
+    def __init__(
+        self,
+        name: str,
+        mean: float,
+        std: float,
+        variance: float,
+        skewness: float,
+        kurtosis: float,
+        min_val: float,
+        max_val: float,
+        corr_with_target: float,
+    ) -> None:
+        self.name = name
+        self.mean = mean
+        self.std = std
+        self.variance = variance
+        self.skewness = skewness
+        self.kurtosis = kurtosis
+        self.min_val = min_val
+        self.max_val = max_val
+        self.corr_with_target = corr_with_target
+
+    def to_dict(self) -> dict[str, Any]:
+        """Serialise profile to a plain dictionary."""
+        return {
+            "name": self.name,
+            "mean": self.mean,
+            "std": self.std,
+            "variance": self.variance,
+            "skewness": self.skewness,
+            "kurtosis": self.kurtosis,
+            "min": self.min_val,
+            "max": self.max_val,
+            "corr_with_target": self.corr_with_target,
+        }
+
+
+class TrapPruner:
+    """Identify and remove trap features from a DataFrame.
+
+    Trap features are columns of uncorrelated Gaussian noise that
+    masquerade as valid signals.  Detection relies entirely on
+    statistical distributions and target correlations — column names
+    are deliberately ignored.
+
+    Args:
+        inference_engine: Ollama (or compatible) engine for verification.
+        correlation_threshold: Absolute correlation below which a
+            column is considered uncorrelated.  Default ``0.05``.
+        max_retries: LLM generation retry budget.
+    """
+
+    def __init__(
+        self,
+        inference_engine: InferenceEngine,
+        *,
+        correlation_threshold: float = 0.05,
+        max_retries: int = 2,
+    ) -> None:
+        self.inference_engine = inference_engine
+        self.correlation_threshold = correlation_threshold
+        self.max_retries = max_retries
+
+    def prune(
+        self,
+        df: IntoFrameT,
+        target_col: str,
+    ) -> tuple[IntoFrameT, dict[str, Any]]:
+        """Profile, verify, and drop trap features.
+
+        Args:
+            df: Input DataFrame (pandas, Polars, etc.).
+            target_col: Column name of the prediction target.
+
+        Returns:
+            Tuple of ``(pruned_df, summary)`` where *summary* contains
+            ``dropped_columns`` (list of removed names) and
+            ``verdicts`` (per-column LLM reasoning).
+        """
+        df_nw = nw.from_native(df)  # type: ignore[type-var]
+
+        if target_col not in df_nw.columns:
+            raise ValueError(f"Target column '{target_col}' not found")
+
+        numeric_cols = [
+            c for c in df_nw.columns if c != target_col and df_nw[c].dtype.is_numeric()
+        ]
+
+        if not numeric_cols:
+            logger.info("No numeric feature columns to evaluate.")
+            return df, {"dropped_columns": [], "verdicts": []}
+
+        profiles = self._profile_columns(df_nw, target_col, numeric_cols)
+
+        col_map, prompt = self._build_prompt(profiles)
+
+        verdicts = self._verify_with_llm(prompt, col_map)
+
+        trap_cols = [v["column"] for v in verdicts if v.get("is_trap")]
+
+        summary: dict[str, Any] = {
+            "dropped_columns": trap_cols,
+            "verdicts": verdicts,
+        }
+
+        if trap_cols:
+            logger.info(
+                "Dropping %d trap feature(s): %s",
+                len(trap_cols),
+                trap_cols,
+            )
+            pruned_nw = df_nw.drop(trap_cols)
+            return nw.to_native(pruned_nw), summary  # type: ignore[type-var]
+
+        logger.info("No trap features detected.")
+        return df, summary
+
+    # ------------------------------------------------------------------
+    # Statistical profiling
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _profile_columns(
+        df_nw: nw.DataFrame[Any],
+        target_col: str,
+        numeric_cols: list[str],
+    ) -> list[_ColumnProfile]:
+        """Compute distribution statistics for each numeric column.
+
+        All operations use the Narwhals interface.  Division-by-zero
+        and other math errors are caught per-column.
+        """
+        n = df_nw.shape[0]
+        if n < 2:
+            return []
+
+        target_series = df_nw[target_col].cast(nw.Float64)
+        target_mean = target_series.mean()
+        target_std = target_series.std()
+
+        profiles: list[_ColumnProfile] = []
+
+        for col in numeric_cols:
+            try:
+                series = df_nw[col].cast(nw.Float64)
+                col_mean = series.mean()
+                col_std = series.std()
+
+                diffs = series - col_mean
+                variance = (diffs * diffs).mean()
+
+                if col_std and col_std > 0 and target_std and target_std > 0:
+                    corr = float(
+                        ((series - col_mean) * (target_series - target_mean)).mean()
+                        / (col_std * target_std)
+                    )
+                else:
+                    corr = 0.0
+
+                if col_std and col_std > 0:
+                    skewness = float((diffs**3).mean() / (col_std**3))
+                    kurtosis = float((diffs**4).mean() / (col_std**4)) - 3.0
+                else:
+                    skewness = 0.0
+                    kurtosis = 0.0
+
+                profiles.append(
+                    _ColumnProfile(
+                        name=col,
+                        mean=float(col_mean) if col_mean is not None else 0.0,
+                        std=float(col_std) if col_std is not None else 0.0,
+                        variance=float(variance) if variance is not None else 0.0,
+                        skewness=skewness,
+                        kurtosis=kurtosis,
+                        min_val=float(series.min()),
+                        max_val=float(series.max()),
+                        corr_with_target=corr,
+                    )
+                )
+            except (ZeroDivisionError, ValueError, OverflowError):
+                profiles.append(
+                    _ColumnProfile(
+                        name=col,
+                        mean=0.0,
+                        std=0.0,
+                        variance=0.0,
+                        skewness=0.0,
+                        kurtosis=0.0,
+                        min_val=0.0,
+                        max_val=0.0,
+                        corr_with_target=0.0,
+                    )
+                )
+
+        return profiles
+
+    # ------------------------------------------------------------------
+    # Prompt construction (anonymised)
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_prompt(
+        profiles: list[_ColumnProfile],
+    ) -> tuple[dict[str, str], str]:
+        """Build the LLM verification prompt with anonymised column IDs.
+
+        Returns:
+            Tuple of ``(col_map, prompt_text)`` where *col_map* maps
+            ``"col_0"`` → real column name.
+        """
+        col_map: dict[str, str] = {}
+        lines: list[str] = []
+
+        for i, p in enumerate(profiles):
+            anon = f"col_{i}"
+            col_map[anon] = p.name
+
+            lines.append(
+                f"Column {anon}: "
+                f"mean={p.mean:.4f}, std={p.std:.4f}, "
+                f"variance={p.variance:.4f}, "
+                f"skewness={p.skewness:.4f}, kurtosis={p.kurtosis:.4f}, "
+                f"min={p.min_val:.4f}, max={p.max_val:.4f}, "
+                f"corr_with_target={p.corr_with_target:.4f}"
+            )
+
+        profile_block = "\n".join(lines)
+
+        prompt = (
+            "You are a statistical analyst. Below are the statistical profiles "
+            "of several numeric columns from a dataset. Each column is "
+            "identified only by an anonymous ID (column names are hidden).\n\n"
+            f"{profile_block}\n\n"
+            "A **trap feature** is a column that:\n"
+            "1. Exhibits a distribution close to standard Gaussian "
+            "(skewness ≈ 0, kurtosis ≈ 0, i.e. excess kurtosis near zero).\n"
+            "2. Has a correlation with the target variable very close to "
+            "zero (|corr| < 0.05).\n\n"
+            "Analyse each column and output ONLY a JSON array. "
+            "For each column output an object with exactly three keys:\n"
+            '- "column": the anonymous ID (e.g. "col_0")\n'
+            '- "is_trap": boolean\n'
+            '- "reason": brief explanation\n\n'
+            "Output ONLY the JSON array, no other text."
+        )
+
+        return col_map, prompt
+
+    # ------------------------------------------------------------------
+    # LLM verification
+    # ------------------------------------------------------------------
+
+    def _verify_with_llm(
+        self,
+        prompt: str,
+        col_map: dict[str, str],
+    ) -> list[dict[str, Any]]:
+        """Send the prompt to the LLM and parse the verdict."""
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                raw = self.inference_engine.generate(prompt)
+                return self._parse_verdict(str(raw).strip(), col_map)
+            except (json.JSONDecodeError, ValueError, KeyError) as exc:
+                logger.warning(
+                    "LLM verdict parsing failed (attempt %d/%d): %s",
+                    attempt,
+                    self.max_retries,
+                    exc,
+                )
+        logger.warning("Could not parse LLM verdicts — keeping all columns.")
+        return [
+            {"column": real, "is_trap": False, "reason": "LLM parse failure"}
+            for real in col_map.values()
+        ]
+
+    @staticmethod
+    def _parse_verdict(
+        response: str,
+        col_map: dict[str, str],
+    ) -> list[dict[str, Any]]:
+        """Parse the JSON verdict and map anonymous IDs back to real names."""
+        text = response.strip()
+        start = text.find("[")
+        end = text.rfind("]")
+        if start == -1 or end == -1:
+            raise ValueError("No JSON array found in LLM response")
+
+        items: list[dict[str, Any]] = json.loads(text[start : end + 1])
+        verdicts: list[dict[str, Any]] = []
+
+        for item in items:
+            anon_id = item["column"]
+            real_name = col_map.get(anon_id, anon_id)
+            verdicts.append(
+                {
+                    "column": real_name,
+                    "is_trap": bool(item.get("is_trap", False)),
+                    "reason": str(item.get("reason", "")),
+                }
+            )
+
+        return verdicts
diff --git a/src/loclean/inference/model_manager.py b/src/loclean/inference/model_manager.py
index eb09f9b..559fa51 100644
--- a/src/loclean/inference/model_manager.py
+++ b/src/loclean/inference/model_manager.py
@@ -22,6 +22,8 @@
 
 logger = configure_module_logger(__name__, level=logging.INFO)
 
+_verified_models: set[str] = set()
+
 
 def model_exists(client: Any, model: str) -> bool:
     """Check whether *model* is already available in the local Ollama registry.
@@ -38,9 +40,14 @@ def model_exists(client: Any, model: str) -> bool:
     except Exception:
         return False
 
-    models = response.get("models", [])
+    models = getattr(response, "models", None)
+    if models is None:
+        models = response.get("models", []) if isinstance(response, dict) else []
+
     for entry in models:
-        name: str = entry.get("name", "")
+        name: str = getattr(entry, "model", None) or (
+            entry.get("name", "") if isinstance(entry, dict) else ""
+        )
         if name == model or name.startswith(f"{model}:"):
             return True
     return False
@@ -64,11 +71,15 @@ def ensure_model(
     Raises:
         RuntimeError: If the pull fails or encounters an error status.
     """
+    if model in _verified_models:
+        return
+
     if model_exists(client, model):
         logger.info(
             f"[green]✓[/green] Model [bold cyan]{model}[/bold cyan] "
             "is already available."
         )
+        _verified_models.add(model)
         return
 
     if console is None:
@@ -114,3 +125,4 @@ def ensure_model(
     console.print(
         f"[green]✓[/green] Model [bold cyan]{model}[/bold cyan] pulled successfully."
     )
+    _verified_models.add(model)
diff --git a/src/loclean/utils/sandbox.py b/src/loclean/utils/sandbox.py
index 632020c..fcf1025 100644
--- a/src/loclean/utils/sandbox.py
+++ b/src/loclean/utils/sandbox.py
@@ -93,9 +93,11 @@ def compile_sandboxed(
     The execution environment has:
 
     * ``__builtins__`` replaced by a curated safe subset (no ``open``,
-      ``exec``, ``eval``, ``__import__``, ``compile``, ``exit``,
-      ``quit``, ``input``, ``breakpoint``, ``globals``, ``locals``,
-      ``vars``, ``dir``).
+      ``exec``, ``eval``, ``compile``, ``exit``, ``quit``, ``input``,
+      ``breakpoint``, ``globals``, ``locals``, ``vars``, ``dir``).
+    * A restricted ``__import__`` that only permits explicitly listed
+      modules — LLM-generated ``import`` statements work for allowed
+      modules but raise ``ImportError`` for anything else.
     * Only explicitly listed standard-library modules injected.
 
     Args:
@@ -110,16 +112,40 @@ def compile_sandboxed(
     Raises:
         ValueError: If compilation fails or *fn_name* is not defined.
     """
-    safe_globals: dict[str, Any] = {"__builtins__": _SAFE_BUILTINS.copy()}
+    allowed = set(allowed_modules or [])
+    preloaded: dict[str, Any] = {}
 
-    for mod_name in allowed_modules or []:
+    for mod_name in allowed:
         try:
-            safe_globals[mod_name] = importlib.import_module(mod_name)
+            preloaded[mod_name] = importlib.import_module(mod_name)
         except ImportError:
             logger.warning(
                 f"[yellow]⚠[/yellow] Module '{mod_name}' not available, skipping"
             )
 
+    def _restricted_import(
+        name: str,
+        globals: Any = None,
+        locals: Any = None,
+        fromlist: Any = (),
+        level: int = 0,
+    ) -> Any:
+        root = name.split(".")[0]
+        if root not in allowed:
+            raise ImportError(
+                f"Import of '{name}' is not allowed in the sandbox. "
+                f"Permitted modules: {sorted(allowed)}"
+            )
+        if root in preloaded:
+            return preloaded[root]
+        return importlib.import_module(name)
+
+    builtins = _SAFE_BUILTINS.copy()
+    builtins["__import__"] = _restricted_import
+
+    safe_globals: dict[str, Any] = {"__builtins__": builtins}
+    safe_globals.update(preloaded)
+
     try:
         exec(source, safe_globals)  # noqa: S102
     except Exception as exc:
diff --git a/src/loclean/utils/source_sanitizer.py b/src/loclean/utils/source_sanitizer.py
new file mode 100644
index 0000000..20d4940
--- /dev/null
+++ b/src/loclean/utils/source_sanitizer.py
@@ -0,0 +1,137 @@
+"""Deterministic source-code sanitizer for LLM-generated Python.
+
+Small models (phi3, etc.) frequently produce output with markdown
+fences, prose preambles, non-ASCII operators, and invalid numeric
+literals.  This module fixes those issues mechanically — no LLM calls
+required — before the code reaches ``compile_sandboxed``.
+"""
+
+from __future__ import annotations
+
+import re
+
+
+def sanitize_source(source: str) -> str:
+    """Clean up common LLM output artifacts from Python source code.
+
+    Applies a sequence of deterministic transformations:
+
+    1. Strip markdown code fences (````python`` / `````)
+    2. Remove prose before the first ``import`` / ``def`` / ``from``
+    3. Remove trailing prose after the last function body
+    4. Replace non-ASCII mathematical operators
+    5. Fix invalid numeric literals
+    6. Strip stray inline backticks
+
+    Args:
+        source: Raw LLM-generated Python source.
+
+    Returns:
+        Cleaned source code ready for ``compile_sandboxed``.
+    """
+    source = _strip_markdown_fences(source)
+    source = _strip_prose(source)
+    source = _fix_unicode_operators(source)
+    source = _fix_numeric_literals(source)
+    source = _strip_backticks(source)
+    return source
+
+
+def _strip_markdown_fences(source: str) -> str:
+    """Remove markdown code fences wrapping the code block."""
+    lines = source.split("\n")
+    cleaned: list[str] = []
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith("```"):
+            continue
+        cleaned.append(line)
+    return "\n".join(cleaned)
+
+
+def _strip_prose(source: str) -> str:
+    """Remove explanatory text before/after the actual code.
+
+    Keeps lines starting from the first ``import``, ``from``, or
+    ``def`` statement through the end of the last indented block.
+    """
+    lines = source.split("\n")
+
+    start_idx = 0
+    for i, line in enumerate(lines):
+        stripped = line.strip()
+        if stripped.startswith(("import ", "from ", "def ", "class ")):
+            start_idx = i
+            break
+
+    end_idx = len(lines)
+    for i in range(len(lines) - 1, start_idx - 1, -1):
+        stripped = lines[i].strip()
+        if stripped and not _is_prose_line(stripped):
+            end_idx = i + 1
+            break
+
+    return "\n".join(lines[start_idx:end_idx])
+
+
+def _is_prose_line(line: str) -> bool:
+    """Heuristic: a line is 'prose' if it looks like natural language."""
+    if not line:
+        return False
+    if line.startswith(("#", "import ", "from ", "def ", "class ", "return ")):
+        return False
+    if line[0] in (" ", "\t", "@"):
+        return False
+    words = line.split()
+    if len(words) >= 4 and not any(c in line for c in ("=", "(", ")", "[", "]", ":")):
+        return True
+    return False
+
+
+_UNICODE_MAP: dict[str, str] = {
+    "\u00d7": "*",  # ×
+    "\u00f7": "/",  # ÷
+    "\u2212": "-",  # −  (minus sign)
+    "\u2013": "-",  # –  (en dash)
+    "\u2014": "-",  # —  (em dash)
+    "\u2018": "'",  # '
+    "\u2019": "'",  # '
+    "\u201c": '"',  # "
+    "\u201d": '"',  # "
+    "\u2264": "<=",  # ≤
+    "\u2265": ">=",  # ≥
+    "\u2260": "!=",  # ≠
+}
+
+
+def _fix_unicode_operators(source: str) -> str:
+    """Replace non-ASCII mathematical and typographic characters."""
+    for char, replacement in _UNICODE_MAP.items():
+        source = source.replace(char, replacement)
+    return source
+
+
+def _fix_numeric_literals(source: str) -> str:
+    """Fix invalid numeric literals commonly produced by small models.
+
+    Patterns handled:
+    - ``0b2``, ``0b3`` etc. (invalid binary digits) → decimal
+    - Trailing currency/unit symbols on numbers (``100$``, ``50€``)
+    """
+    source = re.sub(
+        r"\b0b([2-9]\d*)\b",
+        lambda m: m.group(1),
+        source,
+    )
+    source = re.sub(
+        r"(\d+\.?\d*)[€$£%]+",
+        r"\1",
+        source,
+    )
+    return source
+
+
+def _strip_backticks(source: str) -> str:
+    """Remove stray inline backticks wrapping expressions."""
+    source = re.sub(r"`([^`\n]+)`", r"\1", source)
+    return source
diff --git a/tests/unit/extraction/test_leakage_auditor.py b/tests/unit/extraction/test_leakage_auditor.py
new file mode 100644
index 0000000..5e56578
--- /dev/null
+++ b/tests/unit/extraction/test_leakage_auditor.py
@@ -0,0 +1,228 @@
+"""Unit tests for the TargetLeakageAuditor module."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+import narwhals as nw
+import polars as pl
+import pytest
+
+from loclean.extraction.leakage_auditor import TargetLeakageAuditor
+
+# ------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------
+
+
+def _make_engine(response: str) -> MagicMock:
+    engine = MagicMock()
+    engine.generate.return_value = response
+    return engine
+
+
+def _sample_df() -> pl.DataFrame:
+    return pl.DataFrame(
+        {
+            "age": [25, 30, 45, 50, 35],
+            "income": [50000, 60000, 80000, 90000, 55000],
+            "approved_date": [
+                "2024-01-15",
+                "2024-01-20",
+                "2024-02-01",
+                "2024-02-10",
+                "2024-01-25",
+            ],
+            "feedback_score": [4, 5, 3, 5, 4],
+            "approved": [True, True, False, True, True],
+        }
+    )
+
+
+# ------------------------------------------------------------------
+# _extract_state
+# ------------------------------------------------------------------
+
+
+class TestExtractState:
+    def test_extracts_features_and_samples(self) -> None:
+        df = _sample_df()
+        df_nw = nw.from_native(df)
+        features = ["age", "income", "approved_date", "feedback_score"]
+        state = TargetLeakageAuditor._extract_state(df_nw, "approved", features)
+
+        assert state["target_col"] == "approved"
+        assert state["features"] == features
+        assert len(state["sample_rows"]) <= 10
+        assert "age" in state["dtypes"]
+
+    def test_respects_sample_n(self) -> None:
+        df = _sample_df()
+        df_nw = nw.from_native(df)
+        state = TargetLeakageAuditor._extract_state(
+            df_nw, "approved", ["age"], sample_n=2
+        )
+        assert len(state["sample_rows"]) == 2
+
+
+# ------------------------------------------------------------------
+# _build_prompt
+# ------------------------------------------------------------------
+
+
+class TestBuildPrompt:
+    def test_includes_domain_and_target(self) -> None:
+        state = {
+            "target_col": "approved",
+            "features": ["age", "income"],
+            "dtypes": {"age": "Int64", "income": "Int64"},
+            "sample_rows": [{"age": 25, "income": 50000, "approved": True}],
+        }
+        prompt = TargetLeakageAuditor._build_prompt(state, "loan approval prediction")
+        assert "loan approval prediction" in prompt
+        assert "approved" in prompt
+        assert "age" in prompt
+        assert "is_leakage" in prompt
+
+    def test_no_domain(self) -> None:
+        state = {
+            "target_col": "y",
+            "features": ["x"],
+            "dtypes": {"x": "Float64"},
+            "sample_rows": [{"x": 1.0, "y": 0}],
+        }
+        prompt = TargetLeakageAuditor._build_prompt(state, "")
+        assert "Dataset domain:" not in prompt
+
+
+# ------------------------------------------------------------------
+# _parse_verdict
+# ------------------------------------------------------------------
+
+
+class TestParseVerdict:
+    def test_parses_valid_json(self) -> None:
+        response = json.dumps(
+            [
+                {"column": "approved_date", "is_leakage": True, "reason": "Post-event"},
+                {"column": "age", "is_leakage": False, "reason": "Pre-event"},
+            ]
+        )
+        verdicts = TargetLeakageAuditor._parse_verdict(response)
+        assert len(verdicts) == 2
+        assert verdicts[0]["column"] == "approved_date"
+        assert verdicts[0]["is_leakage"] is True
+        assert verdicts[1]["is_leakage"] is False
+
+    def test_handles_extra_text(self) -> None:
+        response = (
+            'Analysis:\n[{"column": "x", "is_leakage": false, "reason": "ok"}]\nEnd.'
+        )
+        verdicts = TargetLeakageAuditor._parse_verdict(response)
+        assert len(verdicts) == 1
+
+    def test_raises_on_no_json(self) -> None:
+        with pytest.raises(ValueError, match="No JSON array"):
+            TargetLeakageAuditor._parse_verdict("no json here")
+
+
+# ------------------------------------------------------------------
+# audit (integration with mock LLM)
+# ------------------------------------------------------------------
+
+
+class TestAudit:
+    def test_drops_leaked_columns(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "age", "is_leakage": False, "reason": "ok"},
+                {"column": "income", "is_leakage": False, "reason": "ok"},
+                {"column": "approved_date", "is_leakage": True, "reason": "Post-event"},
+                {
+                    "column": "feedback_score",
+                    "is_leakage": True,
+                    "reason": "Post-event",
+                },
+            ]
+        )
+        engine = _make_engine(response)
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        pruned, summary = auditor.audit(df, "approved", "loan approval")
+
+        assert "approved_date" not in pruned.columns
+        assert "feedback_score" not in pruned.columns
+        assert "age" in pruned.columns
+        assert "income" in pruned.columns
+        assert "approved" in pruned.columns
+        assert "approved_date" in summary["dropped_columns"]
+        assert "feedback_score" in summary["dropped_columns"]
+
+    def test_keeps_all_if_no_leakage(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "age", "is_leakage": False, "reason": "ok"},
+                {"column": "income", "is_leakage": False, "reason": "ok"},
+                {"column": "approved_date", "is_leakage": False, "reason": "ok"},
+                {"column": "feedback_score", "is_leakage": False, "reason": "ok"},
+            ]
+        )
+        engine = _make_engine(response)
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        pruned, summary = auditor.audit(df, "approved")
+
+        assert set(pruned.columns) == set(df.columns)
+        assert summary["dropped_columns"] == []
+
+    def test_missing_target_raises(self) -> None:
+        df = _sample_df()
+        engine = _make_engine("[]")
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        with pytest.raises(ValueError, match="not found"):
+            auditor.audit(df, "nonexistent")
+
+    def test_no_feature_columns(self) -> None:
+        df = pl.DataFrame({"target": [1, 2, 3]})
+        engine = _make_engine("[]")
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        pruned, summary = auditor.audit(df, "target")
+
+        assert pruned.columns == ["target"]
+        assert summary["dropped_columns"] == []
+        engine.generate.assert_not_called()
+
+    def test_summary_contains_verdicts(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "age", "is_leakage": False, "reason": "ok"},
+            ]
+        )
+        engine = _make_engine(response)
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        _, summary = auditor.audit(df, "approved")
+
+        assert "verdicts" in summary
+        assert isinstance(summary["verdicts"], list)
+
+    def test_domain_passed_to_prompt(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "age", "is_leakage": False, "reason": "ok"},
+            ]
+        )
+        engine = _make_engine(response)
+        auditor = TargetLeakageAuditor(inference_engine=engine)
+
+        auditor.audit(df, "approved", domain="healthcare readmission")
+
+        call_args = engine.generate.call_args[0][0]
+        assert "healthcare readmission" in call_args
diff --git a/tests/unit/extraction/test_missingness_recognizer.py b/tests/unit/extraction/test_missingness_recognizer.py
new file mode 100644
index 0000000..13e44c1
--- /dev/null
+++ b/tests/unit/extraction/test_missingness_recognizer.py
@@ -0,0 +1,195 @@
+"""Unit tests for the MissingnessRecognizer module."""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock
+
+import narwhals as nw
+import polars as pl
+
+from loclean.extraction.missingness_recognizer import MissingnessRecognizer
+
+# ------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------
+
+
+def _make_engine(response: str) -> MagicMock:
+    engine = MagicMock()
+    engine.generate.return_value = response
+    return engine
+
+
+_ENCODER_SRC = (
+    "def encode_missingness(row: dict) -> bool:\n"
+    "    try:\n"
+    "        return row.get('category') == 'electronics'\n"
+    "    except Exception:\n"
+    "        return False\n"
+)
+
+
+def _df_with_nulls() -> pl.DataFrame:
+    return pl.DataFrame(
+        {
+            "price": [100.0, None, 300.0, None, 500.0, None],
+            "category": [
+                "clothing",
+                "electronics",
+                "clothing",
+                "electronics",
+                "clothing",
+                "electronics",
+            ],
+            "quantity": [10, 5, 20, 3, 15, 1],
+        }
+    )
+
+
+# ------------------------------------------------------------------
+# _find_null_columns
+# ------------------------------------------------------------------
+
+
+class TestFindNullColumns:
+    def test_detects_columns_with_nulls(self) -> None:
+        df = _df_with_nulls()
+        df_nw = nw.from_native(df)
+        result = MissingnessRecognizer._find_null_columns(df_nw)
+        assert result == ["price"]
+
+    def test_no_nulls(self) -> None:
+        df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+        df_nw = nw.from_native(df)
+        result = MissingnessRecognizer._find_null_columns(df_nw)
+        assert result == []
+
+
+# ------------------------------------------------------------------
+# _sample_null_context
+# ------------------------------------------------------------------
+
+
+class TestSampleNullContext:
+    def test_samples_rows_where_target_is_null(self) -> None:
+        df = _df_with_nulls()
+        df_nw = nw.from_native(df)
+        sample = MissingnessRecognizer._sample_null_context(
+            df_nw, "price", ["category", "quantity"]
+        )
+        assert len(sample) == 3
+        for row in sample:
+            assert "category" in row
+            assert "quantity" in row
+            assert "price" not in row
+
+    def test_respects_max_rows(self) -> None:
+        df = _df_with_nulls()
+        df_nw = nw.from_native(df)
+        sample = MissingnessRecognizer._sample_null_context(
+            df_nw, "price", ["category"], max_rows=2
+        )
+        assert len(sample) == 2
+
+    def test_empty_when_no_nulls(self) -> None:
+        df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+        df_nw = nw.from_native(df)
+        sample = MissingnessRecognizer._sample_null_context(df_nw, "a", ["b"])
+        assert sample == []
+
+
+# ------------------------------------------------------------------
+# _build_prompt
+# ------------------------------------------------------------------
+
+
+class TestBuildPrompt:
+    def test_includes_column_name_and_sample(self) -> None:
+        prompt = MissingnessRecognizer._build_prompt(
+            "price",
+            ["category", "quantity"],
+            [{"category": "electronics", "quantity": 5}],
+        )
+        assert "price" in prompt
+        assert "encode_missingness" in prompt
+        assert "electronics" in prompt
+
+    def test_includes_rules(self) -> None:
+        prompt = MissingnessRecognizer._build_prompt("x", ["y"], [{"y": 1}])
+        assert "try/except" in prompt
+        assert "boolean" in prompt.lower() or "bool" in prompt.lower()
+
+
+# ------------------------------------------------------------------
+# _verify_encoder
+# ------------------------------------------------------------------
+
+
+class TestVerifyEncoder:
+    def test_valid_encoder_passes(self) -> None:
+        def good_fn(row: dict[str, Any]) -> bool:
+            return True
+
+        ok, err = MissingnessRecognizer._verify_encoder(good_fn, [{"a": 1}, {"a": 2}])
+        assert ok is True
+        assert err == ""
+
+    def test_non_bool_return_fails(self) -> None:
+        def bad_fn(row: dict[str, Any]) -> Any:
+            return "not a bool"
+
+        ok, err = MissingnessRecognizer._verify_encoder(bad_fn, [{"a": 1}])
+        assert ok is False
+        assert "bool" in err.lower()
+
+
+# ------------------------------------------------------------------
+# recognize (integration with mock LLM)
+# ------------------------------------------------------------------
+
+
+class TestRecognize:
+    def test_adds_mnar_column(self) -> None:
+        engine = _make_engine(_ENCODER_SRC)
+        recognizer = MissingnessRecognizer(inference_engine=engine, max_retries=1)
+        df = _df_with_nulls()
+        result, summary = recognizer.recognize(df)
+
+        assert "price_mnar" in result.columns
+        assert "price" in summary["patterns"]
+        assert summary["patterns"]["price"]["encoded_as"] == "price_mnar"
+
+    def test_no_nulls_skips(self) -> None:
+        engine = _make_engine("")
+        recognizer = MissingnessRecognizer(inference_engine=engine, max_retries=1)
+        df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+        result, summary = recognizer.recognize(df)
+
+        assert set(result.columns) == {"a", "b"}
+        assert summary["patterns"] == {}
+        engine.generate.assert_not_called()
+
+    def test_target_cols_filter(self) -> None:
+        df = pl.DataFrame(
+            {
+                "a": [1, None, 3],
+                "b": [None, 2, None],
+                "c": [10, 20, 30],
+            }
+        )
+        engine = _make_engine(_ENCODER_SRC)
+        recognizer = MissingnessRecognizer(inference_engine=engine, max_retries=1)
+        _, summary = recognizer.recognize(df, target_cols=["a"])
+
+        assert "a" in summary["patterns"]
+        assert "b" not in summary["patterns"]
+
+    def test_compile_failure_returns_none_pattern(self) -> None:
+        engine = _make_engine("this is not valid python at all!!!")
+        recognizer = MissingnessRecognizer(inference_engine=engine, max_retries=1)
+        df = _df_with_nulls()
+        result, summary = recognizer.recognize(df)
+
+        assert "price_mnar" not in result.columns
+        assert summary["patterns"]["price"] is None
diff --git a/tests/unit/extraction/test_trap_pruner.py b/tests/unit/extraction/test_trap_pruner.py
new file mode 100644
index 0000000..03ddcd0
--- /dev/null
+++ b/tests/unit/extraction/test_trap_pruner.py
@@ -0,0 +1,247 @@
+"""Unit tests for the TrapPruner module."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+import narwhals as nw
+import polars as pl
+import pytest
+
+from loclean.extraction.trap_pruner import TrapPruner, _ColumnProfile
+
+# ------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------
+
+
+def _make_engine(response: str) -> MagicMock:
+    engine = MagicMock()
+    engine.generate.return_value = response
+    return engine
+
+
+def _sample_df() -> pl.DataFrame:
+    """DataFrame with one real feature and one Gaussian noise column."""
+    import random
+
+    random.seed(42)
+    n = 100
+    prices = [200_000 + i * 5000 for i in range(n)]
+    sqft = [150 + i * 10 + random.randint(-5, 5) for i in range(n)]
+    noise = [random.gauss(0, 1) for _ in range(n)]
+
+    return pl.DataFrame(
+        {
+            "sqft": sqft,
+            "noise_feat": noise,
+            "price": prices,
+        }
+    )
+
+
+# ------------------------------------------------------------------
+# _profile_columns
+# ------------------------------------------------------------------
+
+
+class TestProfileColumns:
+    def test_basic_stats(self) -> None:
+        df = _sample_df()
+        df_nw = nw.from_native(df)
+        profiles = TrapPruner._profile_columns(df_nw, "price", ["sqft", "noise_feat"])
+        assert len(profiles) == 2
+
+        sqft_p = next(p for p in profiles if p.name == "sqft")
+        noise_p = next(p for p in profiles if p.name == "noise_feat")
+
+        assert abs(sqft_p.corr_with_target) > 0.5
+        assert abs(noise_p.corr_with_target) < 0.2
+
+    def test_zero_variance_column(self) -> None:
+        df = pl.DataFrame(
+            {
+                "constant": [5] * 10,
+                "target": list(range(10)),
+            }
+        )
+        df_nw = nw.from_native(df)
+        profiles = TrapPruner._profile_columns(df_nw, "target", ["constant"])
+        assert len(profiles) == 1
+        assert profiles[0].variance == 0.0
+        assert profiles[0].corr_with_target == 0.0
+
+    def test_single_row_returns_empty(self) -> None:
+        df = pl.DataFrame({"a": [1], "target": [2]})
+        df_nw = nw.from_native(df)
+        profiles = TrapPruner._profile_columns(df_nw, "target", ["a"])
+        assert profiles == []
+
+
+# ------------------------------------------------------------------
+# _build_prompt
+# ------------------------------------------------------------------
+
+
+class TestBuildPrompt:
+    def test_anonymises_column_names(self) -> None:
+        profiles = [
+            _ColumnProfile(
+                name="secret_column",
+                mean=0.0,
+                std=1.0,
+                variance=1.0,
+                skewness=0.0,
+                kurtosis=0.0,
+                min_val=-3.0,
+                max_val=3.0,
+                corr_with_target=0.01,
+            ),
+        ]
+        col_map, prompt = TrapPruner._build_prompt(profiles)
+
+        assert "secret_column" not in prompt
+        assert "col_0" in prompt
+        assert col_map["col_0"] == "secret_column"
+
+    def test_multiple_columns_indexed(self) -> None:
+        profiles = [
+            _ColumnProfile(
+                name=f"feat_{i}",
+                mean=float(i),
+                std=1.0,
+                variance=1.0,
+                skewness=0.0,
+                kurtosis=0.0,
+                min_val=0.0,
+                max_val=10.0,
+                corr_with_target=0.5,
+            )
+            for i in range(3)
+        ]
+        col_map, prompt = TrapPruner._build_prompt(profiles)
+        assert len(col_map) == 3
+        assert "col_0" in prompt
+        assert "col_1" in prompt
+        assert "col_2" in prompt
+
+
+# ------------------------------------------------------------------
+# _parse_verdict
+# ------------------------------------------------------------------
+
+
+class TestParseVerdict:
+    def test_maps_anonymous_to_real(self) -> None:
+        col_map = {"col_0": "noise_feat", "col_1": "real_feat"}
+        response = json.dumps(
+            [
+                {"column": "col_0", "is_trap": True, "reason": "Gaussian noise"},
+                {"column": "col_1", "is_trap": False, "reason": "Correlated"},
+            ]
+        )
+
+        verdicts = TrapPruner._parse_verdict(response, col_map)
+        assert len(verdicts) == 2
+        assert verdicts[0]["column"] == "noise_feat"
+        assert verdicts[0]["is_trap"] is True
+        assert verdicts[1]["column"] == "real_feat"
+        assert verdicts[1]["is_trap"] is False
+
+    def test_handles_extra_text_around_json(self) -> None:
+        col_map = {"col_0": "feat_a"}
+        response = (
+            "Here is the analysis:\n"
+            '[{"column": "col_0", "is_trap": false, "reason": "ok"}]\nDone.'
+        )
+
+        verdicts = TrapPruner._parse_verdict(response, col_map)
+        assert len(verdicts) == 1
+        assert verdicts[0]["column"] == "feat_a"
+
+    def test_raises_on_no_json(self) -> None:
+        with pytest.raises(ValueError, match="No JSON array"):
+            TrapPruner._parse_verdict("no json here", {})
+
+
+# ------------------------------------------------------------------
+# prune (integration with mock LLM)
+# ------------------------------------------------------------------
+
+
+class TestPrune:
+    def test_removes_trap_columns(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "col_0", "is_trap": False, "reason": "Correlated"},
+                {"column": "col_1", "is_trap": True, "reason": "Gaussian noise"},
+            ]
+        )
+        engine = _make_engine(response)
+        pruner = TrapPruner(inference_engine=engine)
+
+        pruned, summary = pruner.prune(df, "price")
+
+        assert "noise_feat" not in pruned.columns
+        assert "sqft" in pruned.columns
+        assert "price" in pruned.columns
+        assert "noise_feat" in summary["dropped_columns"]
+
+    def test_keeps_all_if_no_traps(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "col_0", "is_trap": False, "reason": "ok"},
+                {"column": "col_1", "is_trap": False, "reason": "ok"},
+            ]
+        )
+        engine = _make_engine(response)
+        pruner = TrapPruner(inference_engine=engine)
+
+        pruned, summary = pruner.prune(df, "price")
+
+        assert set(pruned.columns) == set(df.columns)
+        assert summary["dropped_columns"] == []
+
+    def test_returns_summary_with_verdicts(self) -> None:
+        df = _sample_df()
+        response = json.dumps(
+            [
+                {"column": "col_0", "is_trap": False, "reason": "real"},
+                {"column": "col_1", "is_trap": True, "reason": "noise"},
+            ]
+        )
+        engine = _make_engine(response)
+        pruner = TrapPruner(inference_engine=engine)
+
+        _, summary = pruner.prune(df, "price")
+
+        assert "dropped_columns" in summary
+        assert "verdicts" in summary
+        assert len(summary["verdicts"]) == 2
+
+    def test_missing_target_raises(self) -> None:
+        df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+        engine = _make_engine("[]")
+        pruner = TrapPruner(inference_engine=engine)
+
+        with pytest.raises(ValueError, match="not found"):
+            pruner.prune(df, "nonexistent")
+
+    def test_no_numeric_columns(self) -> None:
+        df = pl.DataFrame(
+            {
+                "name": ["alice", "bob"],
+                "target": [1, 2],
+            }
+        )
+        engine = _make_engine("[]")
+        pruner = TrapPruner(inference_engine=engine)
+
+        pruned, summary = pruner.prune(df, "target")
+
+        assert set(pruned.columns) == set(df.columns)
+        assert summary["dropped_columns"] == []
+        engine.generate.assert_not_called()
diff --git a/tests/unit/inference/test_model_manager.py b/tests/unit/inference/test_model_manager.py
index 248203c..100ca6a 100644
--- a/tests/unit/inference/test_model_manager.py
+++ b/tests/unit/inference/test_model_manager.py
@@ -6,6 +6,7 @@
 import pytest
 from rich.console import Console
 
+from loclean.inference import model_manager
 from loclean.inference.model_manager import ensure_model, model_exists
 
 
@@ -48,6 +49,10 @@ def _make_test_console() -> Console:
 class TestEnsureModel:
     """Tests for ensure_model."""
 
+    @pytest.fixture(autouse=True)
+    def _clear_cache(self) -> None:
+        model_manager._verified_models.clear()
+
     @patch("loclean.inference.model_manager.model_exists", return_value=True)
     def test_model_already_exists_skips_pull(self, _mock_exists: MagicMock) -> None:
         client = MagicMock()
diff --git a/tests/unit/utils/test_sandbox.py b/tests/unit/utils/test_sandbox.py
index bb03e4b..9e028fd 100644
--- a/tests/unit/utils/test_sandbox.py
+++ b/tests/unit/utils/test_sandbox.py
@@ -41,10 +41,9 @@ def test_eval_blocked(self) -> None:
             fn()
 
     def test_import_blocked(self) -> None:
-        source = "def f():\n    return __import__('os')\n"
-        fn = compile_sandboxed(source, "f")
-        with pytest.raises(NameError):
-            fn()
+        source = "import os\ndef f():\n    return os.getcwd()\n"
+        with pytest.raises(ValueError, match="not allowed in the sandbox"):
+            compile_sandboxed(source, "f")
 
     def test_import_statement_blocked(self) -> None:
         source = "import os\ndef f():\n    return os.listdir('.')\n"
diff --git a/tests/unit/utils/test_source_sanitizer.py b/tests/unit/utils/test_source_sanitizer.py
new file mode 100644
index 0000000..edde1c4
--- /dev/null
+++ b/tests/unit/utils/test_source_sanitizer.py
@@ -0,0 +1,147 @@
+"""Tests for loclean.utils.source_sanitizer module."""
+
+from loclean.utils.source_sanitizer import sanitize_source
+
+
+class TestStripMarkdownFences:
+    """Markdown code fences should be removed."""
+
+    def test_python_fences(self) -> None:
+        source = "```python\ndef f():\n    return 1\n```"
+        result = sanitize_source(source)
+        assert "```" not in result
+        assert "def f():" in result
+
+    def test_triple_backtick_only(self) -> None:
+        source = "```\ndef f():\n    return 1\n```"
+        result = sanitize_source(source)
+        assert "```" not in result
+        assert "def f():" in result
+
+    def test_no_fences_passthrough(self) -> None:
+        source = "def f():\n    return 1"
+        assert sanitize_source(source) == source
+
+
+class TestStripProse:
+    """Leading/trailing prose should be removed."""
+
+    def test_leading_explanation(self) -> None:
+        source = (
+            "Here is the corrected function:\n\n"
+            "def generate_features(row):\n"
+            "    return {'a': row['x'] * 2}\n"
+        )
+        result = sanitize_source(source)
+        assert result.startswith("def generate_features")
+
+    def test_trailing_explanation(self) -> None:
+        source = (
+            "def f(row):\n"
+            "    return {'a': 1}\n\n"
+            "This function computes a simple feature by multiplying the value."
+        )
+        result = sanitize_source(source)
+        assert "This function computes" not in result
+        assert "def f(row):" in result
+
+    def test_import_preserved(self) -> None:
+        source = "import math\n\ndef f(row):\n    return {'a': math.log(1)}"
+        result = sanitize_source(source)
+        assert result.startswith("import math")
+
+
+class TestFixUnicodeOperators:
+    """Non-ASCII math operators should be replaced."""
+
+    def test_multiplication(self) -> None:
+        source = "def f(row):\n    return row['a'] \u00d7 row['b']"
+        result = sanitize_source(source)
+        assert "\u00d7" not in result
+        assert "row['a'] * row['b']" in result
+
+    def test_division(self) -> None:
+        source = "def f(row):\n    return row['a'] \u00f7 row['b']"
+        result = sanitize_source(source)
+        assert "/" in result
+
+    def test_minus_sign(self) -> None:
+        source = "def f(row):\n    return row['a'] \u2212 row['b']"
+        result = sanitize_source(source)
+        assert "\u2212" not in result
+        assert "-" in result
+
+    def test_smart_quotes(self) -> None:
+        source = "def f(row):\n    return row[\u2018name\u2019]"
+        result = sanitize_source(source)
+        assert "\u2018" not in result
+        assert "\u2019" not in result
+
+    def test_comparison_operators(self) -> None:
+        source = "def f(x):\n    return x \u2264 10"
+        result = sanitize_source(source)
+        assert "<=" in result
+
+
+class TestFixNumericLiterals:
+    """Invalid numeric literals should be fixed."""
+
+    def test_invalid_binary_digit(self) -> None:
+        source = "def f():\n    x = 0b2\n    return x"
+        result = sanitize_source(source)
+        assert "0b2" not in result
+        assert "2" in result
+
+    def test_trailing_currency(self) -> None:
+        source = "def f():\n    return 100$"
+        result = sanitize_source(source)
+        assert "$" not in result
+        assert "100" in result
+
+    def test_valid_binary_untouched(self) -> None:
+        source = "def f():\n    return 0b101"
+        result = sanitize_source(source)
+        assert "0b101" in result
+
+
+class TestStripBackticks:
+    """Stray inline backticks should be removed."""
+
+    def test_wrapped_expression(self) -> None:
+        source = "def f(row):\n    return `math.log(row['x'])`"
+        result = sanitize_source(source)
+        assert "`" not in result
+        assert "math.log(row['x'])" in result
+
+    def test_no_backticks_passthrough(self) -> None:
+        source = "def f():\n    return 42"
+        assert sanitize_source(source) == source
+
+
+class TestEndToEnd:
+    """Full pipeline integration tests."""
+
+    def test_complete_cleanup(self) -> None:
+        source = (
+            "Here is your function:\n\n"
+            "```python\n"
+            "import math\n\n"
+            "def generate_features(row):\n"
+            "    result = {}\n"
+            "    result[\u2018log_price\u2019] = math.log(row[\u2018price\u2019])\n"
+            "    result['ratio'] = row['a'] \u00d7 row['b']\n"
+            "    return result\n"
+            "```\n\n"
+            "This function generates two features."
+        )
+        result = sanitize_source(source)
+
+        assert "```" not in result
+        assert "Here is your function" not in result
+        assert "This function generates" not in result
+        assert "\u2018" not in result
+        assert "\u00d7" not in result
+        assert "import math" in result
+        assert "def generate_features(row):" in result
+        assert "math.log" in result
+        assert "* row['b']" in result