Verdenroz · Verdenroz · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/README.md b/README.md
@@ -113,6 +113,28 @@ print(response.parsed.label)    # "positive"
 print(response.parsed.score)    # 0.98
 ```
 
+### Embeddings
+
+```python
+# Single text → result.embedding (list[float])
+result = client.embed(
+    model="text-embedding-3-small",
+    input="Python developer with 5 years experience",
+)
+print(len(result.embedding))   # e.g. 1536
+
+# Batch → result.embeddings (list[list[float]])
+result = client.embed(
+    model="text-embedding-3-small",
+    input=["Python developer", "Go engineer", "React developer"],
+)
+print(len(result.embeddings))  # 3
+
+# Also available via Google and Cohere
+result = client.embed(model="gemini-embedding-001", input="Hello")
+result = client.embed(model="embed-english-v3.0", input="Hello")
+```
+
 ### Multi-Provider Switching
 ```python
 # Seamlessly switch between providers
@@ -132,6 +154,7 @@ for model in models:
 - **Multi-Provider Support**: Switch between 8 major AI providers seamlessly
 - **Automatic Detection**: Auto-detects available API keys from environment
 - **Unified Interface**: Consistent API across all providers
+- **Embeddings**: Single and batch text embeddings via OpenAI, Google, and Cohere
 - **Structured Output**: Parse responses directly into Pydantic models
 - **Streaming Support**: Real-time response streaming
 - **Function Calling**: Tool integration with decorators

diff --git a/docs/index.md b/docs/index.md
@@ -68,6 +68,21 @@ for chunk in stream:
     print(chunk.delta or "", end="", flush=True)
 ```
 
+**Embeddings:**
+
+```python
+# Single text
+result = client.embed(model="text-embedding-3-small", input="Python developer")
+print(len(result.embedding))   # e.g. 1536
+
+# Batch
+result = client.embed(
+    model="text-embedding-3-small",
+    input=["Python developer", "Go engineer", "React developer"],
+)
+print(len(result.embeddings))  # 3
+```
+
 **Function Calling:**
 
 ```python
@@ -112,7 +127,6 @@ asyncio.run(main())
 
 ## Roadmap
 
-- **Embeddings Support**: Unified interface for text embeddings across providers
 - **Multi-Modal Support**: Enhanced support for images and audio
 - **Cost Tracking**: Built-in usage and cost monitoring
 - **Advanced Routing**: Load balancing and failover between providers

diff --git a/docs/usage/configuration.md b/docs/usage/configuration.md
@@ -75,14 +75,40 @@ client = Chimeric(
 )
 ```
 
-### Timeout
+### HTTP Client Options
 
-The `timeout` parameter sets the HTTP request timeout (in seconds) for all providers:
+These parameters configure the underlying httpx transport and apply to every provider:
+
+| Parameter | Type | Default | Description |
+|---|---|---|---|
+| `timeout` | `float` | `60.0` | Request timeout in seconds |
+| `max_retries` | `int` | `2` | Retries on connection errors and 5xx responses |
+| `default_headers` | `dict[str, str] \| None` | `None` | Extra headers sent with every request |
+
+```python
+client = Chimeric(
+    timeout=120.0,  # longer timeout for slow models
+    max_retries=3,  # retry up to 3 times before raising
+)
+
+# Set to 0 to disable retries entirely
+client = Chimeric(max_retries=0)
+```
+
+Use `default_headers` for organisation IDs, tracing headers, or any static metadata your infrastructure requires:
 
 ```python
-client = Chimeric(timeout=120.0)  # default is 60.0
+client = Chimeric(
+    openai_api_key="sk-...",
+    default_headers={
+        "OpenAI-Organization": "org-...",
+        "X-Trace-ID": "my-trace-id",
+    },
+)
 ```
 
+These parameters mirror the initialisation options available in the native OpenAI, Anthropic, and Groq SDKs, so migrating from them requires no behavioural changes.
+
 ## Provider Routing
 
 ### Automatic Model Routing

diff --git a/docs/usage/embeddings.md b/docs/usage/embeddings.md
@@ -0,0 +1,139 @@
+# Embeddings
+
+Chimeric provides a unified embedding API across OpenAI, Google, and Cohere through `embed()` and `aembed()`. Pass a single string for one vector or a list of strings for batch embedding — the response shape adjusts automatically.
+
+## Supported Providers
+
+| Provider | Example models |
+|---|---|
+| OpenAI | `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002` |
+| Google | `gemini-embedding-001` |
+| Cohere | `embed-english-v3.0`, `embed-english-light-v3.0`, `embed-multilingual-v3.0`, `embed-v4.0` |
+
+## Single Embedding
+
+Pass a plain string to get one vector back in `result.embedding`:
+
+```python
+from chimeric import Chimeric
+
+client = Chimeric()
+
+result = client.embed(
+    model="text-embedding-3-small",
+    input="Python developer with 5 years experience",
+)
+
+print(result.embedding)             # list[float]
+print(result.model)                 # "text-embedding-3-small"
+print(result.usage.prompt_tokens)   # int
+```
+
+## Batch Embedding
+
+Pass a list of strings to embed multiple texts in a single request. Results arrive in `result.embeddings` in the same order as the input:
+
+```python
+texts = [
+    "Python backend developer",
+    "Go backend engineer",
+    "Frontend React developer",
+]
+
+result = client.embed(model="text-embedding-3-small", input=texts)
+
+print(len(result.embeddings))  # 3
+print(result.embedding)        # None for batch requests
+```
+
+## EmbeddingResponse Fields
+
+| Field | Type | Description |
+|---|---|---|
+| `embedding` | `list[float] | None` | Single embedding vector; `None` for batch requests |
+| `embeddings` | `list[list[float]]` | All vectors for batch requests; empty for single |
+| `model` | `str | None` | Model that produced the response |
+| `usage` | `EmbeddingUsage | None` | Token usage (`prompt_tokens`, `total_tokens`) |
+
+## Custom Dimensions (OpenAI)
+
+OpenAI's `text-embedding-3` family supports truncating embeddings to a smaller dimension, useful for reducing storage and latency:
+
+```python
+result = client.embed(
+    model="text-embedding-3-small",
+    input="Hello world",
+    dimensions=256,
+)
+
+print(len(result.embedding))  # 256
+```
+
+## Async Usage
+
+Use `aembed()` in async contexts — it mirrors `embed()` exactly:
+
+```python
+import asyncio
+from chimeric import Chimeric
+
+
+async def main():
+    client = Chimeric()
+
+    # Single
+    result = await client.aembed(
+        model="text-embedding-3-small",
+        input="Hello from async",
+    )
+    print(len(result.embedding))
+
+    # Batch
+    result = await client.aembed(
+        model="text-embedding-3-small",
+        input=["text one", "text two"],
+    )
+    print(len(result.embeddings))  # 2
+
+
+asyncio.run(main())
+```
+
+## Cross-Provider Switching
+
+The same call pattern works across all supported providers — only the model name changes:
+
+```python
+openai_result = client.embed(model="text-embedding-3-small", input="machine learning engineer")
+google_result = client.embed(model="gemini-embedding-001", input="machine learning engineer")
+cohere_result = client.embed(model="embed-english-v3.0", input="machine learning engineer")
+```
+
+## Cosine Similarity
+
+Embeddings are most useful when compared. Here is a minimal cosine similarity helper:
+
+```python
+import math
+
+
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = math.sqrt(sum(x * x for x in a))
+    norm_b = math.sqrt(sum(x * x for x in b))
+    return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
+
+
+result = client.embed(
+    model="text-embedding-3-small",
+    input=[
+        "Python backend developer",
+        "Python backend engineer",  # similar
+        "Chocolate cake recipe",    # dissimilar
+    ],
+)
+
+a, b, c = result.embeddings
+print(cosine_similarity(a, b))  # ~0.96 — high similarity
+print(cosine_similarity(a, c))  # ~0.70 — low similarity
+```
diff --git a/docs/usage/responses.md b/docs/usage/responses.md
@@ -62,22 +62,60 @@ summarize("gemini-1.5-pro", "...")
 
 Provider-specific details that don't map to the standard fields are available in `response.metadata`.
 
+## EmbeddingResponse
+
+`embed()` and `aembed()` return an `EmbeddingResponse`:
+
+```python
+result = client.embed(model="text-embedding-3-small", input="Hello")
+
+print(result.embedding)           # list[float] — single vector
+print(result.model)               # str | None — model used
+print(result.usage.prompt_tokens) # int
+print(result.usage.total_tokens)  # int
+```
+
+For batch input the vectors arrive in `embeddings` and `embedding` is `None`:
+
+```python
+result = client.embed(
+    model="text-embedding-3-small",
+    input=["first text", "second text"],
+)
+
+print(result.embedding)   # None
+print(result.embeddings)  # list[list[float]] — one vector per input
+```
+
+| Field | Type | Description |
+|---|---|---|
+| `embedding` | `list[float] \| None` | Single vector; `None` for batch |
+| `embeddings` | `list[list[float]]` | All vectors for batch; empty for single |
+| `model` | `str \| None` | Model that produced the response |
+| `usage` | `EmbeddingUsage \| None` | `prompt_tokens` and `total_tokens` |
+
 ## Async Support
 
-`agenerate()` is the async counterpart with the same return types:
+`agenerate()` and `aembed()` are the async counterparts with the same return types:
 
 ```python
 import asyncio
 
+
 async def main():
-    # Non-streaming
+    # Non-streaming completion
     response = await client.agenerate(model="gpt-4o", messages="Hello")
     print(response.content)
 
-    # Streaming
+    # Streaming completion
     stream = await client.agenerate(model="gpt-4o", messages="Tell a story", stream=True)
     async for chunk in stream:
         print(chunk.delta or "", end="", flush=True)
 
+    # Embedding
+    result = await client.aembed(model="text-embedding-3-small", input="Hello")
+    print(len(result.embedding))
+
+
 asyncio.run(main())
 ```
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -71,6 +71,7 @@ nav:
     - Response Types: usage/responses.md
     - Streaming: usage/streaming.md
     - Tools: usage/tools.md
+    - Embeddings: usage/embeddings.md
   - Contributing: contributing.md
   - Changelog: https://github.com/Verdenroz/chimeric/releases
 

diff --git a/src/chimeric/__init__.py b/src/chimeric/__init__.py
@@ -13,6 +13,8 @@
 )
 from .types import (
     CompletionResponse,
+    EmbeddingResponse,
+    EmbeddingUsage,
     Input,
     Message,
     Metadata,
@@ -30,6 +32,8 @@
     "Chimeric",
     "ChimericError",
     "CompletionResponse",
+    "EmbeddingResponse",
+    "EmbeddingUsage",
     "Input",
     "Message",
     "Metadata",
@@ -48,4 +52,4 @@
     "Usage",
 ]
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
diff --git a/src/chimeric/adapters/__init__.py b/src/chimeric/adapters/__init__.py
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 from .anthropic import AnthropicAdapter
-from .base import Adapter, StreamState
+from .base import Adapter, EmbeddingAdapter, StreamState
 from .cohere import CohereAdapter
 from .google import GoogleAdapter
 from .openai import OpenAIAdapter
@@ -16,6 +16,7 @@
     "Adapter",
     "AnthropicAdapter",
     "CohereAdapter",
+    "EmbeddingAdapter",
     "GoogleAdapter",
     "OpenAIAdapter",
     "StreamState",