fix: resolve mypy type checking issues

EarthlyAlien · EarthlyAlien · commit beddc20724d0 · 2025-03-17T06:57:07.000+05:30
diff --git a/conftest.py b/conftest.py
@@ -2,23 +2,23 @@
 import pytest
 import tempfile
 from unittest.mock import patch, MagicMock
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 import numpy as np
+from langchain.schema import Document, AIMessage
 
 from document_processor import DocumentProcessor
 from vector_store import VectorStore
 from rag import RAG
 
-class MockDocument:
+class MockDocument(Document):
     """Mock LangChain document for testing."""
     
-    def __init__(self, page_content: str, metadata: Dict[str, Any] = None):
-        self.page_content = page_content
-        self.metadata = metadata or {}
+    def __init__(self, page_content: str, metadata: Optional[Dict[str, Any]] = None):
+        super().__init__(page_content=page_content, metadata=metadata or {})
 
 
 @pytest.fixture
-def sample_documents() -> List[MockDocument]:
+def sample_documents() -> List[Document]:
     """Create sample documents for testing."""
     return [
         MockDocument(
@@ -55,7 +55,7 @@ def mock_pdf_file():
 
 
 @pytest.fixture
-def document_processor():
+def document_processor() -> DocumentProcessor:
     """Create a DocumentProcessor instance for testing."""
     return DocumentProcessor(chunk_size=100, chunk_overlap=20)
 
@@ -77,7 +77,7 @@ def mock_sentence_transformer():
 
 
 @pytest.fixture
-def vector_store(mock_sentence_transformer):
+def vector_store(mock_sentence_transformer) -> VectorStore:
     """Create a VectorStore instance with mocked embeddings for testing."""
     return VectorStore()
 
@@ -89,15 +89,14 @@ def mock_openai():
         # Configure the mock response
         instance = MagicMock()
         
-        response = MagicMock()
-        response.content = "This is a mock response from the language model."
+        response = AIMessage(content="This is a mock response from the language model.")
         
         instance.invoke = MagicMock(return_value=response)
         mock.return_value = instance
         yield mock
 
 
 @pytest.fixture
-def rag_instance(vector_store, mock_openai):
+def rag_instance(vector_store, mock_openai) -> RAG:
     """Create a RAG instance with a mocked vector store and language model."""
     return RAG(vector_store) 
diff --git a/document_processor.py b/document_processor.py
@@ -1,7 +1,8 @@
 import os
-from typing import List, Dict, Any
+from typing import List
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
 
 class DocumentProcessor:
     """Handles document loading and chunking."""
@@ -22,7 +23,7 @@ def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
             length_function=len,
         )
         
-    def load_pdf(self, file_path: str) -> List[Dict[str, Any]]:
+    def load_pdf(self, file_path: str) -> List[Document]:
         """
         Load a PDF document and split it into chunks.
         
@@ -48,7 +49,7 @@ def load_pdf(self, file_path: str) -> List[Dict[str, Any]]:
         except Exception as e:
             raise Exception(f"Error loading PDF: {str(e)}")
     
-    def chunk_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def chunk_documents(self, documents: List[Document]) -> List[Document]:
         """
         Split documents into smaller chunks.
         
@@ -60,7 +61,7 @@ def chunk_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any
         """
         return self.text_splitter.split_documents(documents)
     
-    def process_document(self, file_path: str) -> List[Dict[str, Any]]:
+    def process_document(self, file_path: str) -> List[Document]:
         """
         Process a document: load and chunk it.
         
diff --git a/rag.py b/rag.py
@@ -1,24 +1,25 @@
 import os
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional, Union, cast
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
-from langchain.schema import HumanMessage, SystemMessage
+from langchain.schema import HumanMessage, SystemMessage, AIMessage, BaseMessage
+from langchain.schema import Document
 
 load_dotenv()
 
 class RAG:
     """Retrieval Augmented Generation for question answering."""
     
-    def __init__(self, vector_store, model_name: str = "gpt-3.5-turbo"):
+    def __init__(self, vector_store, temperature: float = 0.7):
         """
         Initialize the RAG system.
         
         Args:
             vector_store: Vector store instance
-            model_name: OpenAI model name
+            temperature: Model temperature for response generation
         """
         self.vector_store = vector_store
-        self.model = ChatOpenAI(model_name=model_name)
+        self.model = ChatOpenAI(temperature=temperature)
         
     def generate_answer(self, query: str, k: int = 4) -> str:
         """
@@ -49,6 +50,8 @@ def generate_answer(self, query: str, k: int = 4) -> str:
         # Generate response
         try:
             response = self.model.invoke(messages)
-            return response.content
+            if isinstance(response, BaseMessage):
+                return str(response.content)
+            return str(response)
         except Exception as e:
             return f"Error generating response: {str(e)}" 
diff --git a/vector_store.py b/vector_store.py
@@ -1,7 +1,8 @@
 import os
 import faiss
 import numpy as np
-from typing import List, Dict, Any, Tuple
+from typing import List, Dict, Any, Optional
+from langchain.schema import Document
 from sentence_transformers import SentenceTransformer
 
 class VectorStore:
@@ -18,11 +19,11 @@ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
             self.model = SentenceTransformer(model_name)
             self.dimension = self.model.get_sentence_embedding_dimension()
             self.index = faiss.IndexFlatL2(self.dimension)
-            self.documents = []
+            self.documents: List[Document] = []
         except Exception as e:
             raise Exception(f"Error initializing vector store: {str(e)}")
         
-    def add_documents(self, documents: List[Dict[str, Any]]) -> None:
+    def add_documents(self, documents: List[Document]) -> None:
         """
         Add documents to the vector store.
         
@@ -45,7 +46,7 @@ def add_documents(self, documents: List[Dict[str, Any]]) -> None:
         except Exception as e:
             raise Exception(f"Error adding documents: {str(e)}")
     
-    def similarity_search(self, query: str, k: int = 4) -> List[Dict[str, Any]]:
+    def similarity_search(self, query: str, k: int = 4) -> List[Document]:
         """
         Perform a similarity search for the query.