Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
490 changes: 429 additions & 61 deletions src/loclean/__init__.py

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions src/loclean/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ def _init_db(self) -> None:
last_access TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS code_cache (
hash_key TEXT PRIMARY KEY,
source_code TEXT NOT NULL,
last_access TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
self.conn.commit()

def _hash(self, text: str, instruction: str) -> str:
Expand Down Expand Up @@ -149,6 +156,52 @@ def set_batch(
except Exception as e:
logger.error(f"Error writing to cache: {e}")

def get_code(self, key: str) -> Optional[str]:
"""Retrieve cached source code by hash key.

Args:
key: SHA256 hash key.

Returns:
Source code string if found, ``None`` on miss.
"""
cursor = self.conn.cursor()
try:
cursor.execute(
"SELECT source_code FROM code_cache WHERE hash_key = ?",
(key,),
)
row = cursor.fetchone()
if row is None:
return None
cursor.execute(
"UPDATE code_cache SET last_access = CURRENT_TIMESTAMP "
"WHERE hash_key = ?",
(key,),
)
return row[0] # type: ignore[no-any-return]
except Exception as e:
logger.error(f"Error reading code cache: {e}")
return None

def set_code(self, key: str, source: str) -> None:
"""Store source code in the cache.

Args:
key: SHA256 hash key.
source: Python source code string.
"""
cursor = self.conn.cursor()
try:
cursor.execute(
"INSERT OR REPLACE INTO code_cache "
"(hash_key, source_code) VALUES (?, ?)",
(key, source),
)
self.conn.commit()
except Exception as e:
logger.error(f"Error writing code cache: {e}")

def close(self) -> None:
"""Close the database connection."""
self.conn.close()
Expand Down
40 changes: 38 additions & 2 deletions src/loclean/extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,43 @@
"""Extraction module for structured data extraction using Pydantic schemas."""

from __future__ import annotations

from typing import TYPE_CHECKING

from .extract_dataframe import extract_dataframe_compiled
from .extractor import Extractor
from .optimizer import InstructionOptimizer

__all__ = ["Extractor", "InstructionOptimizer", "extract_dataframe_compiled"]
if TYPE_CHECKING:
from .feature_discovery import FeatureDiscovery
from .optimizer import InstructionOptimizer
from .oversampler import SemanticOversampler
from .resolver import EntityResolver
from .shredder import RelationalShredder

__all__ = [
"EntityResolver",
"Extractor",
"FeatureDiscovery",
"InstructionOptimizer",
"RelationalShredder",
"SemanticOversampler",
"extract_dataframe_compiled",
]

_LAZY_IMPORTS: dict[str, str] = {
"EntityResolver": ".resolver",
"FeatureDiscovery": ".feature_discovery",
"InstructionOptimizer": ".optimizer",
"RelationalShredder": ".shredder",
"SemanticOversampler": ".oversampler",
}


def __getattr__(name: str) -> object:
module_path = _LAZY_IMPORTS.get(name)
if module_path is not None:
import importlib

module = importlib.import_module(module_path, __name__)
return getattr(module, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
Loading