Skip to content

Commit 6412aa9

Browse files
authored
Merge pull request #212 from CausalInferenceLab/feat/vector-store-backends
Feat/vector store backends
2 parents 068b70d + a9acce9 commit 6412aa9

File tree

19 files changed

+1085
-16
lines changed

19 files changed

+1085
-16
lines changed

docs/tutorials/vector-store-backends.md

Lines changed: 598 additions & 0 deletions
Large diffs are not rendered by default.

src/lang2sql/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from .integrations.vectorstore.faiss_ import FAISSVectorStore
2+
from .integrations.vectorstore.pgvector_ import PGVectorStore
13
from .components.execution.sql_executor import SQLExecutor
24
from .components.generation.sql_generator import SQLGenerator
35
from .components.loaders.directory_ import DirectoryLoader
@@ -59,4 +61,7 @@
5961
"Lang2SQLError",
6062
"ComponentError",
6163
"IntegrationMissingError",
64+
# Vector store backends
65+
"FAISSVectorStore",
66+
"PGVectorStore",
6267
]

src/lang2sql/components/loaders/directory_.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from pathlib import Path
45

56
from ...core.catalog import TextDocument
@@ -53,5 +54,8 @@ def load(self) -> list[TextDocument]:
5354
loader = self._loaders.get(file.suffix.lower())
5455
if loader is None:
5556
continue
56-
docs.extend(loader.load(str(file)))
57+
try:
58+
docs.extend(loader.load(str(file)))
59+
except Exception as e:
60+
warnings.warn(f"Failed to load {file}: {e}", stacklevel=2)
5761
return docs

src/lang2sql/components/loaders/markdown_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from pathlib import Path
44

55
from ...core.catalog import TextDocument
6+
from ...core.ports import DocumentLoaderPort
67

78

8-
class MarkdownLoader:
9+
class MarkdownLoader(DocumentLoaderPort):
910
"""
1011
Markdown file(s) (.md) → list[TextDocument].
1112

src/lang2sql/components/loaders/plaintext_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from pathlib import Path
44

55
from ...core.catalog import TextDocument
6+
from ...core.ports import DocumentLoaderPort
67

78

8-
class PlainTextLoader:
9+
class PlainTextLoader(DocumentLoaderPort):
910
"""
1011
Plain text file(s) (.txt, etc.) → list[TextDocument].
1112

src/lang2sql/components/retrieval/chunker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def chunk(self, entry: CatalogEntry) -> list[IndexedChunk]:
9797
return chunks
9898

9999

100-
class RecursiveCharacterChunker:
100+
class RecursiveCharacterChunker(DocumentChunkerPort):
101101
"""
102102
Hierarchical separator-based document chunker. No external dependencies.
103103
@@ -161,6 +161,8 @@ def chunk(self, doc: TextDocument) -> list[IndexedChunk]:
161161

162162
def _split(self, text: str, separators: list[str]) -> list[str]:
163163
"""Recursively try separators until all chunks fit within chunk_size."""
164+
if not separators:
165+
return [text] if text else []
164166
chunks: list[str] = []
165167
separator = separators[-1] # fallback: character-level split
166168

src/lang2sql/core/hooks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ class TraceHook(Protocol):
2727
def on_event(self, event: Event) -> None: ...
2828

2929

30-
class NullHook:
30+
class NullHook(TraceHook):
3131
def on_event(self, event: Event) -> None:
3232
return
3333

3434

35-
class MemoryHook:
35+
class MemoryHook(TraceHook):
3636
def __init__(self) -> None:
3737
self.events: list[Event] = []
3838

src/lang2sql/integrations/chunking/semantic_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from ...core.catalog import IndexedChunk, TextDocument
44
from ...core.exceptions import IntegrationMissingError
55
from ...core.ports import EmbeddingPort
6+
from ...components.retrieval.chunker import DocumentChunkerPort
67

78

8-
class SemanticChunker:
9+
class SemanticChunker(DocumentChunkerPort):
910
"""
1011
Embedding-based semantic chunker. Optional — explicit opt-in only.
1112

src/lang2sql/integrations/db/sqlalchemy_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any
44

55
from ...core.exceptions import IntegrationMissingError
6+
from ...core.ports import DBPort
67

78
try:
89
from sqlalchemy import create_engine, text as sa_text
@@ -13,7 +14,7 @@
1314
Engine = None # type: ignore[assignment,misc]
1415

1516

16-
class SQLAlchemyDB:
17+
class SQLAlchemyDB(DBPort):
1718
"""DBPort implementation backed by SQLAlchemy 2.x."""
1819

1920
def __init__(self, url: str) -> None:

src/lang2sql/integrations/embedding/openai_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
from __future__ import annotations
22

33
from ...core.exceptions import IntegrationMissingError
4+
from ...core.ports import EmbeddingPort
45

56
try:
67
import openai as _openai
78
except ImportError:
89
_openai = None # type: ignore[assignment]
910

1011

11-
class OpenAIEmbedding:
12+
class OpenAIEmbedding(EmbeddingPort):
1213
"""EmbeddingPort implementation backed by OpenAI Embeddings API."""
1314

1415
def __init__(

0 commit comments

Comments
 (0)