From 60899001ff1709c1fecbebcca08a86457377f610 Mon Sep 17 00:00:00 2001
From: s-github-2 <145811049+s-github-2@users.noreply.github.com>
Date: Sun, 14 Sep 2025 15:38:05 -0500
Subject: [PATCH 1/3] fix for windows npm location and utf8 related errors

---
 database.py   | 692 ++++++++++++++++++++++++++++++++++++++++++++++++++
 run_system.py |  80 ++++--
 2 files changed, 757 insertions(+), 15 deletions(-)
 create mode 100644 database.py

diff --git a/database.py b/database.py
new file mode 100644
index 00000000..043bbbb4
--- /dev/null
+++ b/database.py
@@ -0,0 +1,692 @@
+import sqlite3
+import uuid
+import json
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+
+class ChatDatabase:
+    def __init__(self, db_path: str = None):
+        if db_path is None:
+            # Auto-detect environment and set appropriate path
+            import os
+            if os.path.exists("/app"):  # Docker environment
+                self.db_path = "/app/backend/chat_data.db"
+            else:  # Local development environment
+                self.db_path = "backend/chat_data.db"
+        else:
+            self.db_path = db_path
+        self.init_database()
+    
+    def init_database(self):
+        """Initialize the SQLite database with required tables"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        
+        # Enable foreign keys
+        conn.execute("PRAGMA foreign_keys = ON")
+        
+        # Sessions table
+        conn.execute('''
+            CREATE TABLE IF NOT EXISTS sessions (
+                id TEXT PRIMARY KEY,
+                title TEXT NOT NULL,
+                created_at TEXT NOT NULL,
+                updated_at TEXT NOT NULL,
+                model_used TEXT NOT NULL,
+                message_count INTEGER DEFAULT 0
+            )
+        ''')
+        
+        # Messages table
+        conn.execute('''
+            CREATE TABLE IF NOT EXISTS messages (
+                id TEXT PRIMARY KEY,
+                session_id TEXT NOT NULL,
+                content TEXT NOT NULL,
+                sender TEXT NOT NULL CHECK (sender IN ('user', 'assistant')),
+                timestamp TEXT NOT NULL,
+                metadata TEXT DEFAULT '{}',
+                FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
+            )
+        ''')
+        
+        # Create indexes for better performance
+        conn.execute('CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id)')
+        conn.execute('CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp)')
+        conn.execute('CREATE INDEX IF NOT EXISTS idx_sessions_updated_at ON sessions(updated_at)')
+        
+        # Documents table
+        conn.execute('''
+            CREATE TABLE IF NOT EXISTS session_documents (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id TEXT NOT NULL,
+                file_path TEXT NOT NULL,
+                indexed INTEGER DEFAULT 0,
+                FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
+            )
+        ''')
+        conn.execute('CREATE INDEX IF NOT EXISTS idx_session_documents_session_id ON session_documents(session_id)')
+        
+        # --- NEW: Index persistence tables ---
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS indexes (
+                id TEXT PRIMARY KEY,
+                name TEXT UNIQUE,
+                description TEXT,
+                created_at TEXT,
+                updated_at TEXT,
+                vector_table_name TEXT,
+                metadata TEXT
+            )
+        ''')
+
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS index_documents (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                index_id TEXT,
+                original_filename TEXT,
+                stored_path TEXT,
+                FOREIGN KEY(index_id) REFERENCES indexes(id)
+            )
+        ''')
+
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS session_indexes (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id TEXT,
+                index_id TEXT,
+                linked_at TEXT,
+                FOREIGN KEY(session_id) REFERENCES sessions(id),
+                FOREIGN KEY(index_id) REFERENCES indexes(id)
+            )
+        ''')
+        
+        conn.commit()
+        conn.close()
+        print(" Database initialized successfully") #✅
+    
+    def create_session(self, title: str, model: str) -> str:
+        """Create a new chat session"""
+        session_id = str(uuid.uuid4())
+        now = datetime.now().isoformat()
+        
+        conn = sqlite3.connect(self.db_path)
+        conn.execute('''
+            INSERT INTO sessions (id, title, created_at, updated_at, model_used)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (session_id, title, now, now, model))
+        conn.commit()
+        conn.close()
+        
+        print(f"📝 Created new session: {session_id[:8]}... - {title}")
+        return session_id
+    
+    def get_sessions(self, limit: int = 50) -> List[Dict]:
+        """Get all chat sessions, ordered by most recent"""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        
+        cursor = conn.execute('''
+            SELECT id, title, created_at, updated_at, model_used, message_count
+            FROM sessions
+            ORDER BY updated_at DESC
+            LIMIT ?
+        ''', (limit,))
+        
+        sessions = [dict(row) for row in cursor.fetchall()]
+        conn.close()
+        
+        return sessions
+    
+    def get_session(self, session_id: str) -> Optional[Dict]:
+        """Get a specific session"""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        
+        cursor = conn.execute('''
+            SELECT id, title, created_at, updated_at, model_used, message_count
+            FROM sessions
+            WHERE id = ?
+        ''', (session_id,))
+        
+        row = cursor.fetchone()
+        conn.close()
+        
+        return dict(row) if row else None
+    
+    def add_message(self, session_id: str, content: str, sender: str, metadata: Dict = None) -> str:
+        """Add a message to a session"""
+        message_id = str(uuid.uuid4())
+        now = datetime.now().isoformat()
+        metadata_json = json.dumps(metadata or {})
+        
+        conn = sqlite3.connect(self.db_path)
+        
+        # Add the message
+        conn.execute('''
+            INSERT INTO messages (id, session_id, content, sender, timestamp, metadata)
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (message_id, session_id, content, sender, now, metadata_json))
+        
+        # Update session timestamp and message count
+        conn.execute('''
+            UPDATE sessions 
+            SET updated_at = ?, 
+                message_count = message_count + 1
+            WHERE id = ?
+        ''', (now, session_id))
+        
+        conn.commit()
+        conn.close()
+        
+        return message_id
+    
+    def get_messages(self, session_id: str, limit: int = 100) -> List[Dict]:
+        """Get all messages for a session"""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        
+        cursor = conn.execute('''
+            SELECT id, content, sender, timestamp, metadata
+            FROM messages
+            WHERE session_id = ?
+            ORDER BY timestamp ASC
+            LIMIT ?
+        ''', (session_id, limit))
+        
+        messages = []
+        for row in cursor.fetchall():
+            message = dict(row)
+            message['metadata'] = json.loads(message['metadata'])
+            messages.append(message)
+        
+        conn.close()
+        return messages
+    
+    def get_conversation_history(self, session_id: str) -> List[Dict]:
+        """Get conversation history in the format expected by Ollama"""
+        messages = self.get_messages(session_id)
+        
+        history = []
+        for msg in messages:
+            history.append({
+                "role": msg["sender"],
+                "content": msg["content"]
+            })
+        
+        return history
+    
+    def update_session_title(self, session_id: str, title: str):
+        """Update session title"""
+        conn = sqlite3.connect(self.db_path)
+        conn.execute('''
+            UPDATE sessions 
+            SET title = ?, updated_at = ?
+            WHERE id = ?
+        ''', (title, datetime.now().isoformat(), session_id))
+        conn.commit()
+        conn.close()
+    
+    def delete_session(self, session_id: str) -> bool:
+        """Delete a session and all its messages"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,))
+        deleted = cursor.rowcount > 0
+        conn.commit()
+        conn.close()
+        
+        if deleted:
+            print(f"🗑️ Deleted session: {session_id[:8]}...")
+        
+        return deleted
+    
+    def cleanup_empty_sessions(self) -> int:
+        """Remove sessions with no messages"""
+        conn = sqlite3.connect(self.db_path)
+        
+        # Find sessions with no messages
+        cursor = conn.execute('''
+            SELECT s.id FROM sessions s
+            LEFT JOIN messages m ON s.id = m.session_id
+            WHERE m.id IS NULL
+        ''')
+        
+        empty_sessions = [row[0] for row in cursor.fetchall()]
+        
+        # Delete empty sessions
+        deleted_count = 0
+        for session_id in empty_sessions:
+            cursor = conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,))
+            if cursor.rowcount > 0:
+                deleted_count += 1
+                print(f"🗑️ Cleaned up empty session: {session_id[:8]}...")
+        
+        conn.commit()
+        conn.close()
+        
+        if deleted_count > 0:
+            print(f"✨ Cleaned up {deleted_count} empty sessions")
+        
+        return deleted_count
+    
+    def get_stats(self) -> Dict:
+        """Get database statistics"""
+        conn = sqlite3.connect(self.db_path)
+        
+        # Get session count
+        cursor = conn.execute('SELECT COUNT(*) FROM sessions')
+        session_count = cursor.fetchone()[0]
+        
+        # Get message count
+        cursor = conn.execute('SELECT COUNT(*) FROM messages')
+        message_count = cursor.fetchone()[0]
+        
+        # Get most used model
+        cursor = conn.execute('''
+            SELECT model_used, COUNT(*) as count
+            FROM sessions
+            GROUP BY model_used
+            ORDER BY count DESC
+            LIMIT 1
+        ''')
+        most_used_model = cursor.fetchone()
+        
+        conn.close()
+        
+        return {
+            "total_sessions": session_count,
+            "total_messages": message_count,
+            "most_used_model": most_used_model[0] if most_used_model else None
+        }
+
+    def add_document_to_session(self, session_id: str, file_path: str) -> int:
+        """Adds a document file path to a session."""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.execute(
+            "INSERT INTO session_documents (session_id, file_path) VALUES (?, ?)",
+            (session_id, file_path)
+        )
+        doc_id = cursor.lastrowid
+        conn.commit()
+        conn.close()
+        print(f"📄 Added document '{file_path}' to session {session_id[:8]}...")
+        return doc_id
+
+    def get_documents_for_session(self, session_id: str) -> List[str]:
+        """Retrieves all document file paths for a given session."""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.execute(
+            "SELECT file_path FROM session_documents WHERE session_id = ?",
+            (session_id,)
+        )
+        paths = [row[0] for row in cursor.fetchall()]
+        conn.close()
+        return paths
+
+    # -------- Index helpers ---------
+
+    def create_index(self, name: str, description: str|None = None, metadata: dict | None = None) -> str:
+        idx_id = str(uuid.uuid4())
+        created = datetime.now().isoformat()
+        vector_table = f"text_pages_{idx_id}"
+        conn = sqlite3.connect(self.db_path)
+        conn.execute('''
+            INSERT INTO indexes (id, name, description, created_at, updated_at, vector_table_name, metadata)
+            VALUES (?,?,?,?,?,?,?)
+        ''', (idx_id, name, description, created, created, vector_table, json.dumps(metadata or {})))
+        conn.commit()
+        conn.close()
+        print(f"📂 Created new index '{name}' ({idx_id[:8]})")
+        return idx_id
+
+    def get_index(self, index_id: str) -> dict | None:
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        cur = conn.execute('SELECT * FROM indexes WHERE id=?', (index_id,))
+        row = cur.fetchone()
+        if not row:
+            conn.close()
+            return None
+        idx = dict(row)
+        idx['metadata'] = json.loads(idx['metadata'] or '{}')
+        cur = conn.execute('SELECT original_filename, stored_path FROM index_documents WHERE index_id=?', (index_id,))
+        docs = [{'filename': r[0], 'stored_path': r[1]} for r in cur.fetchall()]
+        idx['documents'] = docs
+        conn.close()
+        return idx
+
+    def list_indexes(self) -> list[dict]:
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        rows = conn.execute('SELECT * FROM indexes').fetchall()
+        res = []
+        for r in rows:
+            item = dict(r)
+            item['metadata'] = json.loads(item['metadata'] or '{}')
+            # attach documents list for convenience
+            docs_cur = conn.execute('SELECT original_filename, stored_path FROM index_documents WHERE index_id=?', (item['id'],))
+            docs = [{'filename':d[0],'stored_path':d[1]} for d in docs_cur.fetchall()]
+            item['documents'] = docs
+            res.append(item)
+        conn.close()
+        return res
+
+    def add_document_to_index(self, index_id: str, filename: str, stored_path: str):
+        conn = sqlite3.connect(self.db_path)
+        conn.execute('INSERT INTO index_documents (index_id, original_filename, stored_path) VALUES (?,?,?)', (index_id, filename, stored_path))
+        conn.commit()
+        conn.close()
+
+    def link_index_to_session(self, session_id: str, index_id: str):
+        conn = sqlite3.connect(self.db_path)
+        conn.execute('INSERT INTO session_indexes (session_id, index_id, linked_at) VALUES (?,?,?)', (session_id, index_id, datetime.now().isoformat()))
+        conn.commit()
+        conn.close()
+
+    def get_indexes_for_session(self, session_id: str) -> list[str]:
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.execute('SELECT index_id FROM session_indexes WHERE session_id=? ORDER BY linked_at', (session_id,))
+        ids = [r[0] for r in cursor.fetchall()]
+        conn.close()
+        return ids
+
+    def delete_index(self, index_id: str) -> bool:
+        """Delete an index and its related records (documents, session links). Returns True if deleted."""
+        conn = sqlite3.connect(self.db_path)
+        try:
+            # Get vector table name before deletion (optional, for LanceDB cleanup)
+            cur = conn.execute('SELECT vector_table_name FROM indexes WHERE id = ?', (index_id,))
+            row = cur.fetchone()
+            vector_table_name = row[0] if row else None
+
+            # Remove child rows first due to foreign‐key constraints
+            conn.execute('DELETE FROM index_documents WHERE index_id = ?', (index_id,))
+            conn.execute('DELETE FROM session_indexes WHERE index_id = ?', (index_id,))
+            cursor = conn.execute('DELETE FROM indexes WHERE id = ?', (index_id,))
+            deleted = cursor.rowcount > 0
+            conn.commit()
+        finally:
+            conn.close()
+
+        if deleted:
+            print(f"🗑️ Deleted index {index_id[:8]}... and related records")
+            # Optional: attempt to drop LanceDB table if available
+            if vector_table_name:
+                try:
+                    from rag_system.indexing.embedders import LanceDBManager
+                    import os
+                    db_path = os.getenv('LANCEDB_PATH') or './rag_system/index_store/lancedb'
+                    ldb = LanceDBManager(db_path)
+                    db = ldb.db
+                    if hasattr(db, 'table_names') and vector_table_name in db.table_names():
+                        db.drop_table(vector_table_name)
+                        print(f"🚮 Dropped LanceDB table '{vector_table_name}'")
+                except Exception as e:
+                    print(f"⚠️ Could not drop LanceDB table '{vector_table_name}': {e}")
+        return deleted
+
+    def update_index_metadata(self, index_id: str, updates: dict):
+        """Merge new key/values into an index's metadata JSON column."""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        cur = conn.execute('SELECT metadata FROM indexes WHERE id=?', (index_id,))
+        row = cur.fetchone()
+        if row is None:
+            conn.close()
+            raise ValueError("Index not found")
+        existing = json.loads(row['metadata'] or '{}')
+        existing.update(updates)
+        conn.execute('UPDATE indexes SET metadata=?, updated_at=? WHERE id=?', (json.dumps(existing), datetime.now().isoformat(), index_id))
+        conn.commit()
+        conn.close()
+
+    def inspect_and_populate_index_metadata(self, index_id: str) -> dict:
+        """
+        Inspect LanceDB table to extract metadata for older indexes.
+        Returns the inferred metadata or empty dict if inspection fails.
+        """
+        try:
+            # Get index info
+            index_info = self.get_index(index_id)
+            if not index_info:
+                return {}
+            
+            # Check if metadata is already populated
+            if index_info.get('metadata') and len(index_info['metadata']) > 0:
+                return index_info['metadata']
+            
+            # Try to inspect the LanceDB table
+            vector_table_name = index_info.get('vector_table_name')
+            if not vector_table_name:
+                return {}
+            
+            try:
+                # Try to import the RAG system modules
+                try:
+                    from rag_system.indexing.embedders import LanceDBManager
+                    import os
+                    
+                    # Use the same path as the system
+                    db_path = os.getenv('LANCEDB_PATH') or './rag_system/index_store/lancedb'
+                    ldb = LanceDBManager(db_path)
+                    
+                    # Check if table exists
+                    if not hasattr(ldb.db, 'table_names') or vector_table_name not in ldb.db.table_names():
+                        # Table doesn't exist - this means the index was never properly built
+                        inferred_metadata = {
+                            'status': 'incomplete',
+                            'issue': 'Vector table not found - index may not have been built properly',
+                            'vector_table_expected': vector_table_name,
+                            'available_tables': list(ldb.db.table_names()) if hasattr(ldb.db, 'table_names') else [],
+                            'metadata_inferred_at': datetime.now().isoformat(),
+                            'metadata_source': 'lancedb_inspection'
+                        }
+                        self.update_index_metadata(index_id, inferred_metadata)
+                        print(f"⚠️ Index {index_id[:8]}... appears incomplete - vector table missing")
+                        return inferred_metadata
+                    
+                    # Get table and inspect schema/data
+                    table = ldb.db.open_table(vector_table_name)
+                    
+                    # Get a sample record to inspect - use correct LanceDB API
+                    try:
+                        # Try to get sample data using proper LanceDB methods
+                        sample_df = table.to_pandas()
+                        if len(sample_df) == 0:
+                            inferred_metadata = {
+                                'status': 'empty',
+                                'issue': 'Vector table exists but contains no data',
+                                'metadata_inferred_at': datetime.now().isoformat(),
+                                'metadata_source': 'lancedb_inspection'
+                            }
+                            self.update_index_metadata(index_id, inferred_metadata)
+                            return inferred_metadata
+                        
+                        # Take only first row for inspection
+                        sample_df = sample_df.head(1)
+                    except Exception as e:
+                        print(f"⚠️ Could not read data from table {vector_table_name}: {e}")
+                        return {}
+                    
+                    # Infer metadata from table structure
+                    inferred_metadata = {
+                        'status': 'functional',
+                        'total_chunks': len(table.to_pandas()),  # Get total count
+                    }
+                    
+                    # Check vector dimensions
+                    if 'vector' in sample_df.columns:
+                        vector_data = sample_df['vector'].iloc[0]
+                        if isinstance(vector_data, list):
+                            inferred_metadata['vector_dimensions'] = len(vector_data)
+                            
+                            # Try to infer embedding model from vector dimensions
+                            dim_to_model = {
+                                384: 'BAAI/bge-small-en-v1.5 (or similar)',
+                                512: 'sentence-transformers/all-MiniLM-L6-v2 (or similar)',
+                                768: 'BAAI/bge-base-en-v1.5 (or similar)', 
+                                1024: 'Qwen/Qwen3-Embedding-0.6B (or similar)',
+                                1536: 'text-embedding-ada-002 (or similar)'
+                            }
+                            if len(vector_data) in dim_to_model:
+                                inferred_metadata['embedding_model_inferred'] = dim_to_model[len(vector_data)]
+                    
+                    # Try to parse metadata from sample record
+                    if 'metadata' in sample_df.columns:
+                        try:
+                            sample_metadata = json.loads(sample_df['metadata'].iloc[0])
+                            # Look for common metadata fields that might give us clues
+                            if 'document_id' in sample_metadata:
+                                inferred_metadata['has_document_structure'] = True
+                            if 'chunk_index' in sample_metadata:
+                                inferred_metadata['has_chunk_indexing'] = True
+                            if 'original_text' in sample_metadata:
+                                inferred_metadata['has_contextual_enrichment'] = True
+                                inferred_metadata['retrieval_mode_inferred'] = 'hybrid (contextual enrichment detected)'
+                            
+                            # Check for chunk size patterns
+                            if 'text' in sample_df.columns:
+                                text_length = len(sample_df['text'].iloc[0])
+                                if text_length > 0:
+                                    inferred_metadata['sample_chunk_length'] = text_length
+                                    # Rough chunk size estimation
+                                    estimated_tokens = text_length // 4  # rough estimate: 4 chars per token
+                                    if estimated_tokens < 300:
+                                        inferred_metadata['chunk_size_inferred'] = '256 tokens (estimated)'
+                                    elif estimated_tokens < 600:
+                                        inferred_metadata['chunk_size_inferred'] = '512 tokens (estimated)'
+                                    else:
+                                        inferred_metadata['chunk_size_inferred'] = '1024+ tokens (estimated)'
+                                        
+                        except (json.JSONDecodeError, KeyError):
+                            pass
+                    
+                    # Check if FTS index exists
+                    try:
+                        indices = table.list_indices()
+                        fts_exists = any('fts' in idx.name.lower() for idx in indices)
+                        if fts_exists:
+                            inferred_metadata['has_fts_index'] = True
+                            inferred_metadata['retrieval_mode_inferred'] = 'hybrid (FTS + vector)'
+                        else:
+                            inferred_metadata['retrieval_mode_inferred'] = 'vector-only'
+                    except:
+                        pass
+                    
+                    # Add inspection timestamp
+                    inferred_metadata['metadata_inferred_at'] = datetime.now().isoformat()
+                    inferred_metadata['metadata_source'] = 'lancedb_inspection'
+                    
+                    # Update the database with inferred metadata
+                    if inferred_metadata:
+                        self.update_index_metadata(index_id, inferred_metadata)
+                        print(f"🔍 Inferred metadata for index {index_id[:8]}...: {len(inferred_metadata)} fields")
+                    
+                    return inferred_metadata
+                    
+                except ImportError as import_error:
+                    # RAG system modules not available - provide basic fallback metadata
+                    print(f"⚠️ RAG system modules not available for inspection: {import_error}")
+                    
+                    # Check if this is actually a legacy index by looking at creation date
+                    created_at = index_info.get('created_at', '')
+                    is_recent = False
+                    if created_at:
+                        try:
+                            from datetime import datetime, timedelta
+                            created_date = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
+                            # Consider indexes created in the last 30 days as "recent"
+                            is_recent = created_date > datetime.now().replace(tzinfo=created_date.tzinfo) - timedelta(days=30)
+                        except:
+                            pass
+                    
+                    # Provide basic fallback metadata with better status detection
+                    if is_recent:
+                        status = 'functional'
+                        issue = 'Detailed configuration inspection requires RAG system modules, but index appears functional'
+                    else:
+                        status = 'legacy'
+                        issue = 'This index was created before metadata tracking was implemented. Configuration details are not available.'
+                    
+                    fallback_metadata = {
+                        'status': status,
+                        'issue': issue,
+                        'metadata_inferred_at': datetime.now().isoformat(),
+                        'metadata_source': 'fallback_inspection',
+                        'documents_count': len(index_info.get('documents', [])),
+                        'created_at': index_info.get('created_at', 'unknown'),
+                        'inspection_limitation': 'Backend server cannot access full RAG system modules for detailed inspection'
+                    }
+                    
+                    # Try to infer some basic info from the vector table name
+                    if vector_table_name:
+                        fallback_metadata['vector_table_name'] = vector_table_name
+                        fallback_metadata['note'] = 'Vector table exists but detailed inspection requires RAG system modules'
+                    
+                    self.update_index_metadata(index_id, fallback_metadata)
+                    status_msg = "recent but limited inspection" if is_recent else "legacy"
+                    print(f"📝 Added fallback metadata for {status_msg} index {index_id[:8]}...")
+                    return fallback_metadata
+                    
+            except Exception as e:
+                print(f"⚠️ Could not inspect LanceDB table for index {index_id[:8]}...: {e}")
+                return {}
+                
+        except Exception as e:
+            print(f"⚠️ Failed to inspect index metadata for {index_id[:8]}...: {e}")
+            return {}
+
+def generate_session_title(first_message: str, max_length: int = 50) -> str:
+    """Generate a session title from the first message"""
+    # Clean up the message
+    title = first_message.strip()
+    
+    # Remove common prefixes
+    prefixes = ["hey", "hi", "hello", "can you", "please", "i want", "i need"]
+    title_lower = title.lower()
+    for prefix in prefixes:
+        if title_lower.startswith(prefix):
+            title = title[len(prefix):].strip()
+            break
+    
+    # Capitalize first letter
+    if title:
+        title = title[0].upper() + title[1:]
+    
+    # Truncate if too long
+    if len(title) > max_length:
+        title = title[:max_length].strip() + "..."
+    
+    # Fallback
+    if not title or len(title) < 3:
+        title = "New Chat"
+    
+    return title
+
+# Global database instance
+db = ChatDatabase()
+
+if __name__ == "__main__":
+    # Test the database
+    print("🧪 Testing database...")
+    
+    # Create a test session
+    session_id = db.create_session("Test Chat", "llama3.2:latest")
+    
+    # Add some messages
+    db.add_message(session_id, "Hello!", "user")
+    db.add_message(session_id, "Hi there! How can I help you?", "assistant")
+    
+    # Get messages
+    messages = db.get_messages(session_id)
+    print(f"📨 Messages: {len(messages)}")
+    
+    # Get sessions
+    sessions = db.get_sessions()
+    print(f"📋 Sessions: {len(sessions)}")
+    
+    # Get stats
+    stats = db.get_stats()
+    print(f"📊 Stats: {stats}")
+    
+    print("✅ Database test completed!")  
\ No newline at end of file
diff --git a/run_system.py b/run_system.py
index 8064d6be..cb86ac3b 100644
--- a/run_system.py
+++ b/run_system.py
@@ -35,7 +35,8 @@
 import logging
 from dataclasses import dataclass
 import psutil
-
+import io # NEW
+import shutil  # NEW
 @dataclass
 class ServiceConfig:
     name: str
@@ -104,24 +105,49 @@ def __init__(self, mode: str = "dev", logs_dir: str = "logs"):
         signal.signal(signal.SIGINT, self._signal_handler)
         signal.signal(signal.SIGTERM, self._signal_handler)
     
+    # def setup_logging(self):
+    #     """Setup centralized logging with colors."""
+    #     # Create main logger
+    #     self.logger = logging.getLogger('system')
+    #     self.logger.setLevel(logging.INFO)
+        
+    #     # Console handler with colors
+    #     console_handler = logging.StreamHandler(sys.stdout)
+    #     console_handler.setFormatter(ColoredFormatter())
+    #     self.logger.addHandler(console_handler)
+        
+    #     # File handler for system logs
+    #     #file_handler = logging.FileHandler(self.logs_dir / 'system.log')
+    #     file_handler = logging.FileHandler(self.logs_dir / 'system.log', encoding='utf-8')
+
+    #     file_handler.setFormatter(logging.Formatter(
+    #         '%(asctime)s [%(levelname)s] %(message)s'
+    #     ))
+    #     self.logger.addHandler(file_handler)
+    
     def setup_logging(self):
         """Setup centralized logging with colors."""
         # Create main logger
         self.logger = logging.getLogger('system')
         self.logger.setLevel(logging.INFO)
-        
-        # Console handler with colors
+
+        # Remove any existing handlers to avoid duplicates / unexpected handlers
+        if self.logger.handlers:
+            for h in list(self.logger.handlers):
+                self.logger.removeHandler(h)
+
+        # Console handler -> use sys.stdout (reconfigured to utf-8 in __main__)
         console_handler = logging.StreamHandler(sys.stdout)
         console_handler.setFormatter(ColoredFormatter())
         self.logger.addHandler(console_handler)
-        
-        # File handler for system logs
-        file_handler = logging.FileHandler(self.logs_dir / 'system.log')
+
+        # File handler for system logs (force UTF-8)
+        file_handler = logging.FileHandler(self.logs_dir / 'system.log', encoding='utf-8')
         file_handler.setFormatter(logging.Formatter(
             '%(asctime)s [%(levelname)s] %(message)s'
         ))
         self.logger.addHandler(file_handler)
-    
+
     def _get_service_configs(self) -> Dict[str, ServiceConfig]:
         """Define service configurations based on mode."""
         base_configs = {
@@ -210,15 +236,30 @@ def check_prerequisites(self) -> bool:
         self.logger.info("✅ All prerequisites satisfied")
         return True
     
+    # def _command_exists(self, command: str) -> bool:
+    #     """Check if a command exists in PATH."""
+    #     try:
+    #         subprocess.run([command, '--version'], 
+    #                      capture_output=True, check=True, timeout=5)
+    #         return True
+    #     except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
+    #         return False
+    
     def _command_exists(self, command: str) -> bool:
-        """Check if a command exists in PATH."""
+        """Check if a command exists in PATH (robust on Windows)."""
+        import shutil
+        # Fast path: is there an executable on PATH?
+        if shutil.which(command):
+            return True
+
+        # Fallback: try running "<command> --version" — allow shell for .cmd shims
         try:
-            subprocess.run([command, '--version'], 
-                         capture_output=True, check=True, timeout=5)
+            subprocess.run(f"{command} --version",
+                           capture_output=True, check=True, timeout=5, shell=True)
             return True
-        except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
+        except Exception:
             return False
-    
+        
     def ensure_models(self):
         """Ensure required Ollama models are available."""
         self.logger.info("📥 Checking required models...")
@@ -263,10 +304,17 @@ def start_service(self, service_name: str, config: ServiceConfig) -> bool:
             env = os.environ.copy()
             if config.env:
                 env.update(config.env)
-            
+            self.logger.info(f"Command: {' '.join(config.command)}")
+                        # Resolve executable on Windows (handle npm -> npm.cmd shims)
+            cmd = list(config.command)
+            if os.name == 'nt' and cmd:
+                resolved = shutil.which(cmd[0])
+                if resolved:
+                    cmd[0] = resolved
+
             # Start process
             process = subprocess.Popen(
-                config.command,
+                cmd, #config.command,
                 cwd=config.cwd,
                 env=env,
                 stdout=subprocess.PIPE,
@@ -308,7 +356,7 @@ def _monitor_service_logs(self, service_name: str, process: subprocess.Popen):
         service_logger.setLevel(logging.INFO)
         
         # Add file handler for this service
-        file_handler = logging.FileHandler(self.logs_dir / f'{service_name}.log')
+        file_handler = logging.FileHandler(self.logs_dir / f'{service_name}.log',encoding='utf-8')
         file_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
         service_logger.addHandler(file_handler)
         
@@ -538,4 +586,6 @@ def main():
         manager.shutdown()
 
 if __name__ == "__main__":
+    import sys # NEW
+    sys.stdout.reconfigure(encoding="utf-8") # NEW
     main() 
\ No newline at end of file

From fe10e4c3f2d9d410c706dcf164c54f8c547be765 Mon Sep 17 00:00:00 2001
From: s-github-2 <145811049+s-github-2@users.noreply.github.com>
Date: Sun, 14 Sep 2025 15:55:13 -0500
Subject: [PATCH 2/3] put the updated database.py in correct folder

---
 .gitignore          |   2 +
 backend/database.py |   2 +-
 database.py         | 692 --------------------------------------------
 3 files changed, 3 insertions(+), 693 deletions(-)
 delete mode 100644 database.py

diff --git a/.gitignore b/.gitignore
index b3358283..2a7dccf9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,5 @@ rag_system/documents/
 !docker.env
 !backend/chat_data.db
 
+create_feature_branch_localGPT1.ipynb
+.gitignore
diff --git a/backend/database.py b/backend/database.py
index a5d38aec..043bbbb4 100644
--- a/backend/database.py
+++ b/backend/database.py
@@ -103,7 +103,7 @@ def init_database(self):
         
         conn.commit()
         conn.close()
-        print("✅ Database initialized successfully")
+        print(" Database initialized successfully") #✅
     
     def create_session(self, title: str, model: str) -> str:
         """Create a new chat session"""
diff --git a/database.py b/database.py
deleted file mode 100644
index 043bbbb4..00000000
--- a/database.py
+++ /dev/null
@@ -1,692 +0,0 @@
-import sqlite3
-import uuid
-import json
-from datetime import datetime
-from typing import List, Dict, Optional, Tuple
-
-class ChatDatabase:
-    def __init__(self, db_path: str = None):
-        if db_path is None:
-            # Auto-detect environment and set appropriate path
-            import os
-            if os.path.exists("/app"):  # Docker environment
-                self.db_path = "/app/backend/chat_data.db"
-            else:  # Local development environment
-                self.db_path = "backend/chat_data.db"
-        else:
-            self.db_path = db_path
-        self.init_database()
-    
-    def init_database(self):
-        """Initialize the SQLite database with required tables"""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.cursor()
-        
-        # Enable foreign keys
-        conn.execute("PRAGMA foreign_keys = ON")
-        
-        # Sessions table
-        conn.execute('''
-            CREATE TABLE IF NOT EXISTS sessions (
-                id TEXT PRIMARY KEY,
-                title TEXT NOT NULL,
-                created_at TEXT NOT NULL,
-                updated_at TEXT NOT NULL,
-                model_used TEXT NOT NULL,
-                message_count INTEGER DEFAULT 0
-            )
-        ''')
-        
-        # Messages table
-        conn.execute('''
-            CREATE TABLE IF NOT EXISTS messages (
-                id TEXT PRIMARY KEY,
-                session_id TEXT NOT NULL,
-                content TEXT NOT NULL,
-                sender TEXT NOT NULL CHECK (sender IN ('user', 'assistant')),
-                timestamp TEXT NOT NULL,
-                metadata TEXT DEFAULT '{}',
-                FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
-            )
-        ''')
-        
-        # Create indexes for better performance
-        conn.execute('CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id)')
-        conn.execute('CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp)')
-        conn.execute('CREATE INDEX IF NOT EXISTS idx_sessions_updated_at ON sessions(updated_at)')
-        
-        # Documents table
-        conn.execute('''
-            CREATE TABLE IF NOT EXISTS session_documents (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                session_id TEXT NOT NULL,
-                file_path TEXT NOT NULL,
-                indexed INTEGER DEFAULT 0,
-                FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
-            )
-        ''')
-        conn.execute('CREATE INDEX IF NOT EXISTS idx_session_documents_session_id ON session_documents(session_id)')
-        
-        # --- NEW: Index persistence tables ---
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS indexes (
-                id TEXT PRIMARY KEY,
-                name TEXT UNIQUE,
-                description TEXT,
-                created_at TEXT,
-                updated_at TEXT,
-                vector_table_name TEXT,
-                metadata TEXT
-            )
-        ''')
-
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS index_documents (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                index_id TEXT,
-                original_filename TEXT,
-                stored_path TEXT,
-                FOREIGN KEY(index_id) REFERENCES indexes(id)
-            )
-        ''')
-
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS session_indexes (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                session_id TEXT,
-                index_id TEXT,
-                linked_at TEXT,
-                FOREIGN KEY(session_id) REFERENCES sessions(id),
-                FOREIGN KEY(index_id) REFERENCES indexes(id)
-            )
-        ''')
-        
-        conn.commit()
-        conn.close()
-        print(" Database initialized successfully") #✅
-    
-    def create_session(self, title: str, model: str) -> str:
-        """Create a new chat session"""
-        session_id = str(uuid.uuid4())
-        now = datetime.now().isoformat()
-        
-        conn = sqlite3.connect(self.db_path)
-        conn.execute('''
-            INSERT INTO sessions (id, title, created_at, updated_at, model_used)
-            VALUES (?, ?, ?, ?, ?)
-        ''', (session_id, title, now, now, model))
-        conn.commit()
-        conn.close()
-        
-        print(f"📝 Created new session: {session_id[:8]}... - {title}")
-        return session_id
-    
-    def get_sessions(self, limit: int = 50) -> List[Dict]:
-        """Get all chat sessions, ordered by most recent"""
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        
-        cursor = conn.execute('''
-            SELECT id, title, created_at, updated_at, model_used, message_count
-            FROM sessions
-            ORDER BY updated_at DESC
-            LIMIT ?
-        ''', (limit,))
-        
-        sessions = [dict(row) for row in cursor.fetchall()]
-        conn.close()
-        
-        return sessions
-    
-    def get_session(self, session_id: str) -> Optional[Dict]:
-        """Get a specific session"""
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        
-        cursor = conn.execute('''
-            SELECT id, title, created_at, updated_at, model_used, message_count
-            FROM sessions
-            WHERE id = ?
-        ''', (session_id,))
-        
-        row = cursor.fetchone()
-        conn.close()
-        
-        return dict(row) if row else None
-    
-    def add_message(self, session_id: str, content: str, sender: str, metadata: Dict = None) -> str:
-        """Add a message to a session"""
-        message_id = str(uuid.uuid4())
-        now = datetime.now().isoformat()
-        metadata_json = json.dumps(metadata or {})
-        
-        conn = sqlite3.connect(self.db_path)
-        
-        # Add the message
-        conn.execute('''
-            INSERT INTO messages (id, session_id, content, sender, timestamp, metadata)
-            VALUES (?, ?, ?, ?, ?, ?)
-        ''', (message_id, session_id, content, sender, now, metadata_json))
-        
-        # Update session timestamp and message count
-        conn.execute('''
-            UPDATE sessions 
-            SET updated_at = ?, 
-                message_count = message_count + 1
-            WHERE id = ?
-        ''', (now, session_id))
-        
-        conn.commit()
-        conn.close()
-        
-        return message_id
-    
-    def get_messages(self, session_id: str, limit: int = 100) -> List[Dict]:
-        """Get all messages for a session"""
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        
-        cursor = conn.execute('''
-            SELECT id, content, sender, timestamp, metadata
-            FROM messages
-            WHERE session_id = ?
-            ORDER BY timestamp ASC
-            LIMIT ?
-        ''', (session_id, limit))
-        
-        messages = []
-        for row in cursor.fetchall():
-            message = dict(row)
-            message['metadata'] = json.loads(message['metadata'])
-            messages.append(message)
-        
-        conn.close()
-        return messages
-    
-    def get_conversation_history(self, session_id: str) -> List[Dict]:
-        """Get conversation history in the format expected by Ollama"""
-        messages = self.get_messages(session_id)
-        
-        history = []
-        for msg in messages:
-            history.append({
-                "role": msg["sender"],
-                "content": msg["content"]
-            })
-        
-        return history
-    
-    def update_session_title(self, session_id: str, title: str):
-        """Update session title"""
-        conn = sqlite3.connect(self.db_path)
-        conn.execute('''
-            UPDATE sessions 
-            SET title = ?, updated_at = ?
-            WHERE id = ?
-        ''', (title, datetime.now().isoformat(), session_id))
-        conn.commit()
-        conn.close()
-    
-    def delete_session(self, session_id: str) -> bool:
-        """Delete a session and all its messages"""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,))
-        deleted = cursor.rowcount > 0
-        conn.commit()
-        conn.close()
-        
-        if deleted:
-            print(f"🗑️ Deleted session: {session_id[:8]}...")
-        
-        return deleted
-    
-    def cleanup_empty_sessions(self) -> int:
-        """Remove sessions with no messages"""
-        conn = sqlite3.connect(self.db_path)
-        
-        # Find sessions with no messages
-        cursor = conn.execute('''
-            SELECT s.id FROM sessions s
-            LEFT JOIN messages m ON s.id = m.session_id
-            WHERE m.id IS NULL
-        ''')
-        
-        empty_sessions = [row[0] for row in cursor.fetchall()]
-        
-        # Delete empty sessions
-        deleted_count = 0
-        for session_id in empty_sessions:
-            cursor = conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,))
-            if cursor.rowcount > 0:
-                deleted_count += 1
-                print(f"🗑️ Cleaned up empty session: {session_id[:8]}...")
-        
-        conn.commit()
-        conn.close()
-        
-        if deleted_count > 0:
-            print(f"✨ Cleaned up {deleted_count} empty sessions")
-        
-        return deleted_count
-    
-    def get_stats(self) -> Dict:
-        """Get database statistics"""
-        conn = sqlite3.connect(self.db_path)
-        
-        # Get session count
-        cursor = conn.execute('SELECT COUNT(*) FROM sessions')
-        session_count = cursor.fetchone()[0]
-        
-        # Get message count
-        cursor = conn.execute('SELECT COUNT(*) FROM messages')
-        message_count = cursor.fetchone()[0]
-        
-        # Get most used model
-        cursor = conn.execute('''
-            SELECT model_used, COUNT(*) as count
-            FROM sessions
-            GROUP BY model_used
-            ORDER BY count DESC
-            LIMIT 1
-        ''')
-        most_used_model = cursor.fetchone()
-        
-        conn.close()
-        
-        return {
-            "total_sessions": session_count,
-            "total_messages": message_count,
-            "most_used_model": most_used_model[0] if most_used_model else None
-        }
-
-    def add_document_to_session(self, session_id: str, file_path: str) -> int:
-        """Adds a document file path to a session."""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.execute(
-            "INSERT INTO session_documents (session_id, file_path) VALUES (?, ?)",
-            (session_id, file_path)
-        )
-        doc_id = cursor.lastrowid
-        conn.commit()
-        conn.close()
-        print(f"📄 Added document '{file_path}' to session {session_id[:8]}...")
-        return doc_id
-
-    def get_documents_for_session(self, session_id: str) -> List[str]:
-        """Retrieves all document file paths for a given session."""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.execute(
-            "SELECT file_path FROM session_documents WHERE session_id = ?",
-            (session_id,)
-        )
-        paths = [row[0] for row in cursor.fetchall()]
-        conn.close()
-        return paths
-
-    # -------- Index helpers ---------
-
-    def create_index(self, name: str, description: str|None = None, metadata: dict | None = None) -> str:
-        idx_id = str(uuid.uuid4())
-        created = datetime.now().isoformat()
-        vector_table = f"text_pages_{idx_id}"
-        conn = sqlite3.connect(self.db_path)
-        conn.execute('''
-            INSERT INTO indexes (id, name, description, created_at, updated_at, vector_table_name, metadata)
-            VALUES (?,?,?,?,?,?,?)
-        ''', (idx_id, name, description, created, created, vector_table, json.dumps(metadata or {})))
-        conn.commit()
-        conn.close()
-        print(f"📂 Created new index '{name}' ({idx_id[:8]})")
-        return idx_id
-
-    def get_index(self, index_id: str) -> dict | None:
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        cur = conn.execute('SELECT * FROM indexes WHERE id=?', (index_id,))
-        row = cur.fetchone()
-        if not row:
-            conn.close()
-            return None
-        idx = dict(row)
-        idx['metadata'] = json.loads(idx['metadata'] or '{}')
-        cur = conn.execute('SELECT original_filename, stored_path FROM index_documents WHERE index_id=?', (index_id,))
-        docs = [{'filename': r[0], 'stored_path': r[1]} for r in cur.fetchall()]
-        idx['documents'] = docs
-        conn.close()
-        return idx
-
-    def list_indexes(self) -> list[dict]:
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        rows = conn.execute('SELECT * FROM indexes').fetchall()
-        res = []
-        for r in rows:
-            item = dict(r)
-            item['metadata'] = json.loads(item['metadata'] or '{}')
-            # attach documents list for convenience
-            docs_cur = conn.execute('SELECT original_filename, stored_path FROM index_documents WHERE index_id=?', (item['id'],))
-            docs = [{'filename':d[0],'stored_path':d[1]} for d in docs_cur.fetchall()]
-            item['documents'] = docs
-            res.append(item)
-        conn.close()
-        return res
-
-    def add_document_to_index(self, index_id: str, filename: str, stored_path: str):
-        conn = sqlite3.connect(self.db_path)
-        conn.execute('INSERT INTO index_documents (index_id, original_filename, stored_path) VALUES (?,?,?)', (index_id, filename, stored_path))
-        conn.commit()
-        conn.close()
-
-    def link_index_to_session(self, session_id: str, index_id: str):
-        conn = sqlite3.connect(self.db_path)
-        conn.execute('INSERT INTO session_indexes (session_id, index_id, linked_at) VALUES (?,?,?)', (session_id, index_id, datetime.now().isoformat()))
-        conn.commit()
-        conn.close()
-
-    def get_indexes_for_session(self, session_id: str) -> list[str]:
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.execute('SELECT index_id FROM session_indexes WHERE session_id=? ORDER BY linked_at', (session_id,))
-        ids = [r[0] for r in cursor.fetchall()]
-        conn.close()
-        return ids
-
-    def delete_index(self, index_id: str) -> bool:
-        """Delete an index and its related records (documents, session links). Returns True if deleted."""
-        conn = sqlite3.connect(self.db_path)
-        try:
-            # Get vector table name before deletion (optional, for LanceDB cleanup)
-            cur = conn.execute('SELECT vector_table_name FROM indexes WHERE id = ?', (index_id,))
-            row = cur.fetchone()
-            vector_table_name = row[0] if row else None
-
-            # Remove child rows first due to foreign‐key constraints
-            conn.execute('DELETE FROM index_documents WHERE index_id = ?', (index_id,))
-            conn.execute('DELETE FROM session_indexes WHERE index_id = ?', (index_id,))
-            cursor = conn.execute('DELETE FROM indexes WHERE id = ?', (index_id,))
-            deleted = cursor.rowcount > 0
-            conn.commit()
-        finally:
-            conn.close()
-
-        if deleted:
-            print(f"🗑️ Deleted index {index_id[:8]}... and related records")
-            # Optional: attempt to drop LanceDB table if available
-            if vector_table_name:
-                try:
-                    from rag_system.indexing.embedders import LanceDBManager
-                    import os
-                    db_path = os.getenv('LANCEDB_PATH') or './rag_system/index_store/lancedb'
-                    ldb = LanceDBManager(db_path)
-                    db = ldb.db
-                    if hasattr(db, 'table_names') and vector_table_name in db.table_names():
-                        db.drop_table(vector_table_name)
-                        print(f"🚮 Dropped LanceDB table '{vector_table_name}'")
-                except Exception as e:
-                    print(f"⚠️ Could not drop LanceDB table '{vector_table_name}': {e}")
-        return deleted
-
-    def update_index_metadata(self, index_id: str, updates: dict):
-        """Merge new key/values into an index's metadata JSON column."""
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
-        cur = conn.execute('SELECT metadata FROM indexes WHERE id=?', (index_id,))
-        row = cur.fetchone()
-        if row is None:
-            conn.close()
-            raise ValueError("Index not found")
-        existing = json.loads(row['metadata'] or '{}')
-        existing.update(updates)
-        conn.execute('UPDATE indexes SET metadata=?, updated_at=? WHERE id=?', (json.dumps(existing), datetime.now().isoformat(), index_id))
-        conn.commit()
-        conn.close()
-
-    def inspect_and_populate_index_metadata(self, index_id: str) -> dict:
-        """
-        Inspect LanceDB table to extract metadata for older indexes.
-        Returns the inferred metadata or empty dict if inspection fails.
-        """
-        try:
-            # Get index info
-            index_info = self.get_index(index_id)
-            if not index_info:
-                return {}
-            
-            # Check if metadata is already populated
-            if index_info.get('metadata') and len(index_info['metadata']) > 0:
-                return index_info['metadata']
-            
-            # Try to inspect the LanceDB table
-            vector_table_name = index_info.get('vector_table_name')
-            if not vector_table_name:
-                return {}
-            
-            try:
-                # Try to import the RAG system modules
-                try:
-                    from rag_system.indexing.embedders import LanceDBManager
-                    import os
-                    
-                    # Use the same path as the system
-                    db_path = os.getenv('LANCEDB_PATH') or './rag_system/index_store/lancedb'
-                    ldb = LanceDBManager(db_path)
-                    
-                    # Check if table exists
-                    if not hasattr(ldb.db, 'table_names') or vector_table_name not in ldb.db.table_names():
-                        # Table doesn't exist - this means the index was never properly built
-                        inferred_metadata = {
-                            'status': 'incomplete',
-                            'issue': 'Vector table not found - index may not have been built properly',
-                            'vector_table_expected': vector_table_name,
-                            'available_tables': list(ldb.db.table_names()) if hasattr(ldb.db, 'table_names') else [],
-                            'metadata_inferred_at': datetime.now().isoformat(),
-                            'metadata_source': 'lancedb_inspection'
-                        }
-                        self.update_index_metadata(index_id, inferred_metadata)
-                        print(f"⚠️ Index {index_id[:8]}... appears incomplete - vector table missing")
-                        return inferred_metadata
-                    
-                    # Get table and inspect schema/data
-                    table = ldb.db.open_table(vector_table_name)
-                    
-                    # Get a sample record to inspect - use correct LanceDB API
-                    try:
-                        # Try to get sample data using proper LanceDB methods
-                        sample_df = table.to_pandas()
-                        if len(sample_df) == 0:
-                            inferred_metadata = {
-                                'status': 'empty',
-                                'issue': 'Vector table exists but contains no data',
-                                'metadata_inferred_at': datetime.now().isoformat(),
-                                'metadata_source': 'lancedb_inspection'
-                            }
-                            self.update_index_metadata(index_id, inferred_metadata)
-                            return inferred_metadata
-                        
-                        # Take only first row for inspection
-                        sample_df = sample_df.head(1)
-                    except Exception as e:
-                        print(f"⚠️ Could not read data from table {vector_table_name}: {e}")
-                        return {}
-                    
-                    # Infer metadata from table structure
-                    inferred_metadata = {
-                        'status': 'functional',
-                        'total_chunks': len(table.to_pandas()),  # Get total count
-                    }
-                    
-                    # Check vector dimensions
-                    if 'vector' in sample_df.columns:
-                        vector_data = sample_df['vector'].iloc[0]
-                        if isinstance(vector_data, list):
-                            inferred_metadata['vector_dimensions'] = len(vector_data)
-                            
-                            # Try to infer embedding model from vector dimensions
-                            dim_to_model = {
-                                384: 'BAAI/bge-small-en-v1.5 (or similar)',
-                                512: 'sentence-transformers/all-MiniLM-L6-v2 (or similar)',
-                                768: 'BAAI/bge-base-en-v1.5 (or similar)', 
-                                1024: 'Qwen/Qwen3-Embedding-0.6B (or similar)',
-                                1536: 'text-embedding-ada-002 (or similar)'
-                            }
-                            if len(vector_data) in dim_to_model:
-                                inferred_metadata['embedding_model_inferred'] = dim_to_model[len(vector_data)]
-                    
-                    # Try to parse metadata from sample record
-                    if 'metadata' in sample_df.columns:
-                        try:
-                            sample_metadata = json.loads(sample_df['metadata'].iloc[0])
-                            # Look for common metadata fields that might give us clues
-                            if 'document_id' in sample_metadata:
-                                inferred_metadata['has_document_structure'] = True
-                            if 'chunk_index' in sample_metadata:
-                                inferred_metadata['has_chunk_indexing'] = True
-                            if 'original_text' in sample_metadata:
-                                inferred_metadata['has_contextual_enrichment'] = True
-                                inferred_metadata['retrieval_mode_inferred'] = 'hybrid (contextual enrichment detected)'
-                            
-                            # Check for chunk size patterns
-                            if 'text' in sample_df.columns:
-                                text_length = len(sample_df['text'].iloc[0])
-                                if text_length > 0:
-                                    inferred_metadata['sample_chunk_length'] = text_length
-                                    # Rough chunk size estimation
-                                    estimated_tokens = text_length // 4  # rough estimate: 4 chars per token
-                                    if estimated_tokens < 300:
-                                        inferred_metadata['chunk_size_inferred'] = '256 tokens (estimated)'
-                                    elif estimated_tokens < 600:
-                                        inferred_metadata['chunk_size_inferred'] = '512 tokens (estimated)'
-                                    else:
-                                        inferred_metadata['chunk_size_inferred'] = '1024+ tokens (estimated)'
-                                        
-                        except (json.JSONDecodeError, KeyError):
-                            pass
-                    
-                    # Check if FTS index exists
-                    try:
-                        indices = table.list_indices()
-                        fts_exists = any('fts' in idx.name.lower() for idx in indices)
-                        if fts_exists:
-                            inferred_metadata['has_fts_index'] = True
-                            inferred_metadata['retrieval_mode_inferred'] = 'hybrid (FTS + vector)'
-                        else:
-                            inferred_metadata['retrieval_mode_inferred'] = 'vector-only'
-                    except:
-                        pass
-                    
-                    # Add inspection timestamp
-                    inferred_metadata['metadata_inferred_at'] = datetime.now().isoformat()
-                    inferred_metadata['metadata_source'] = 'lancedb_inspection'
-                    
-                    # Update the database with inferred metadata
-                    if inferred_metadata:
-                        self.update_index_metadata(index_id, inferred_metadata)
-                        print(f"🔍 Inferred metadata for index {index_id[:8]}...: {len(inferred_metadata)} fields")
-                    
-                    return inferred_metadata
-                    
-                except ImportError as import_error:
-                    # RAG system modules not available - provide basic fallback metadata
-                    print(f"⚠️ RAG system modules not available for inspection: {import_error}")
-                    
-                    # Check if this is actually a legacy index by looking at creation date
-                    created_at = index_info.get('created_at', '')
-                    is_recent = False
-                    if created_at:
-                        try:
-                            from datetime import datetime, timedelta
-                            created_date = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
-                            # Consider indexes created in the last 30 days as "recent"
-                            is_recent = created_date > datetime.now().replace(tzinfo=created_date.tzinfo) - timedelta(days=30)
-                        except:
-                            pass
-                    
-                    # Provide basic fallback metadata with better status detection
-                    if is_recent:
-                        status = 'functional'
-                        issue = 'Detailed configuration inspection requires RAG system modules, but index appears functional'
-                    else:
-                        status = 'legacy'
-                        issue = 'This index was created before metadata tracking was implemented. Configuration details are not available.'
-                    
-                    fallback_metadata = {
-                        'status': status,
-                        'issue': issue,
-                        'metadata_inferred_at': datetime.now().isoformat(),
-                        'metadata_source': 'fallback_inspection',
-                        'documents_count': len(index_info.get('documents', [])),
-                        'created_at': index_info.get('created_at', 'unknown'),
-                        'inspection_limitation': 'Backend server cannot access full RAG system modules for detailed inspection'
-                    }
-                    
-                    # Try to infer some basic info from the vector table name
-                    if vector_table_name:
-                        fallback_metadata['vector_table_name'] = vector_table_name
-                        fallback_metadata['note'] = 'Vector table exists but detailed inspection requires RAG system modules'
-                    
-                    self.update_index_metadata(index_id, fallback_metadata)
-                    status_msg = "recent but limited inspection" if is_recent else "legacy"
-                    print(f"📝 Added fallback metadata for {status_msg} index {index_id[:8]}...")
-                    return fallback_metadata
-                    
-            except Exception as e:
-                print(f"⚠️ Could not inspect LanceDB table for index {index_id[:8]}...: {e}")
-                return {}
-                
-        except Exception as e:
-            print(f"⚠️ Failed to inspect index metadata for {index_id[:8]}...: {e}")
-            return {}
-
-def generate_session_title(first_message: str, max_length: int = 50) -> str:
-    """Generate a session title from the first message"""
-    # Clean up the message
-    title = first_message.strip()
-    
-    # Remove common prefixes
-    prefixes = ["hey", "hi", "hello", "can you", "please", "i want", "i need"]
-    title_lower = title.lower()
-    for prefix in prefixes:
-        if title_lower.startswith(prefix):
-            title = title[len(prefix):].strip()
-            break
-    
-    # Capitalize first letter
-    if title:
-        title = title[0].upper() + title[1:]
-    
-    # Truncate if too long
-    if len(title) > max_length:
-        title = title[:max_length].strip() + "..."
-    
-    # Fallback
-    if not title or len(title) < 3:
-        title = "New Chat"
-    
-    return title
-
-# Global database instance
-db = ChatDatabase()
-
-if __name__ == "__main__":
-    # Test the database
-    print("🧪 Testing database...")
-    
-    # Create a test session
-    session_id = db.create_session("Test Chat", "llama3.2:latest")
-    
-    # Add some messages
-    db.add_message(session_id, "Hello!", "user")
-    db.add_message(session_id, "Hi there! How can I help you?", "assistant")
-    
-    # Get messages
-    messages = db.get_messages(session_id)
-    print(f"📨 Messages: {len(messages)}")
-    
-    # Get sessions
-    sessions = db.get_sessions()
-    print(f"📋 Sessions: {len(sessions)}")
-    
-    # Get stats
-    stats = db.get_stats()
-    print(f"📊 Stats: {stats}")
-    
-    print("✅ Database test completed!")  
\ No newline at end of file

From d5f474f7d22ab5fe7557c757ffb994c625fc1e9d Mon Sep 17 00:00:00 2001
From: s-github-2 <145811049+s-github-2@users.noreply.github.com>
Date: Sun, 14 Sep 2025 16:00:46 -0500
Subject: [PATCH 3/3] undo gitignore changes

---
 .gitignore | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2a7dccf9..b3358283 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,5 +76,3 @@ rag_system/documents/
 !docker.env
 !backend/chat_data.db
 
-create_feature_branch_localGPT1.ipynb
-.gitignore