From 1e0139b18634aeed59804f771bd8d1b81a23988f Mon Sep 17 00:00:00 2001
From: Bob Hosseini <bbkhosseini@gmail.com>
Date: Thu, 3 Jul 2025 20:31:23 +0200
Subject: [PATCH 1/4] modal app running

---
 .streamlit/config.toml   |   8 +-
 modal/modal_app.py       |  74 +++++++++
 modal/modal_streamlit.py | 330 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 408 insertions(+), 4 deletions(-)
 create mode 100644 modal/modal_app.py
 create mode 100644 modal/modal_streamlit.py

diff --git a/.streamlit/config.toml b/.streamlit/config.toml
index 85695bb..55a91d6 100644
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
@@ -1,6 +1,9 @@
 # ===== These configurtions are for the cloud deployment through streamlit cloud
 # Optimized configuration for Streamlit Cloud performance
 [server]
+headless = true
+port = 8501
+
 # Disable file watching (major performance gain in cloud)
 fileWatcherType = "none"
 
@@ -12,14 +15,11 @@ runOnSave = false
 # Memory and connection optimization
 maxUploadSize = 10
 maxMessageSize = 200
-headless = true
+
 
 # WebSocket settings for better responsiveness
 enableWebsocketCompression = true
 
-# Session management
-cookieSecret = "your-secret-key-here"
-
 [browser]
 # Disable telemetry and stats collection
 gatherUsageStats = false
diff --git a/modal/modal_app.py b/modal/modal_app.py
new file mode 100644
index 0000000..0dc0d19
--- /dev/null
+++ b/modal/modal_app.py
@@ -0,0 +1,74 @@
+import shlex
+import subprocess
+from pathlib import Path
+
+import modal
+
+parent_dir = Path(__file__).parent
+project_root = parent_dir.parent
+streamlit_script_local_path = parent_dir / "modal_streamlit.py"
+streamlit_script_remote_path = "/root/modal_streamlit.py"
+
+image = (
+    modal.Image.debian_slim(python_version="3.12")
+    .pip_install(
+        "chromadb>=1.0.0",
+        "langchain==0.3.25",
+        "langchain-chroma>=0.2.4",
+        "langchain-community>=0.3.25",
+        "langchain-groq>=0.3.2",
+        "langchain-huggingface==0.3.0",
+        "langchain-openai>=0.3.22",
+        "numpy>=2.0.0",
+        "omegaconf>=2.3.0",
+        "pandas>=2.3.0",
+        "protobuf==5.29.5",
+        "pydantic>=2.0.0",
+        "pypdf>=5.6.0",
+        "python-dotenv>=1.0.0",
+        "pysqlite3-binary>=0.5.2",
+        "rank-bm25>=0.2.2",
+        "sentence-transformers>=4.1.0",
+        "streamlit>=1.46.0",
+        "scikit-learn>=1.7.0",
+        )
+    .env({  
+        "DEPLOYMENT_MODE": "cloud",
+        "IN_MEMORY": "true", 
+        "DEBUG_MODE": "false",
+        "PYTHONPATH": "/root"
+    })
+    .add_local_file(
+        streamlit_script_local_path,
+        streamlit_script_remote_path,
+    )
+    .add_local_dir(project_root / "frontend", "/root/frontend")
+    .add_local_dir(project_root / "data/sample_pdfs", "/root/data/sample_pdfs")
+    .add_local_dir(project_root / "backend", "/root/backend")
+    .add_local_dir(project_root / "configs", "/root/configs")
+    .add_local_dir(project_root / ".streamlit", "/root/.streamlit")
+    .add_local_file(project_root / "frontend/static/image.jpeg", "/root/frontend/static/image.jpeg")
+)
+
+app = modal.App(name="two-stage-conrag", image=image)
+
+if not streamlit_script_local_path.exists():
+    raise RuntimeError(
+        "modal_streamlit.py not found! Place the script with your streamlit app in the same directory."
+    )
+
+@app.function(
+        # gpu="T4:1",
+        secrets=[modal.Secret.from_name("groq-secret")]
+        )
+@modal.concurrent(max_inputs=100)
+@modal.web_server(8000)
+def run():
+    target = shlex.quote(streamlit_script_remote_path)
+    cmd = f"""streamlit run {target} \
+        --server.port 8000 \
+        --server.enableCORS=false \
+        --server.enableXsrfProtection=false \
+        --server.headless=true"""
+    subprocess.Popen(cmd, shell=True)
+
diff --git a/modal/modal_streamlit.py b/modal/modal_streamlit.py
new file mode 100644
index 0000000..877e53f
--- /dev/null
+++ b/modal/modal_streamlit.py
@@ -0,0 +1,330 @@
+# ---
+# lambda-test: false  # auxiliary-file
+# ---
+# ## Demo Streamlit application.
+#
+# This application is the example from https://docs.streamlit.io/library/get-started/create-an-app.
+#
+# Streamlit is designed to run its apps as Python scripts, not functions, so we separate the Streamlit
+# code into this module, away from the Modal application code.
+
+
+def main():
+    import numpy as np
+    import pandas as pd
+    import streamlit as st
+    import os
+    from pathlib import Path
+    from omegaconf import OmegaConf
+    import sys
+    # from PIL import Image
+    # sys.path.append('/root/frontend')  # Add this line
+    from backend.my_lib.pdf_manager import PDFManager
+    from backend.my_lib.retrievers import Retrievers
+    from backend.my_lib.qa_chains import QAchains
+    from backend.settings import load_and_validate_env_secrets
+    from backend.my_lib.LLMManager import LLMManager
+    from frontend.helper_gui import (
+        question_input_output_ui,
+        display_results_ui,
+        pdf_uploader_ui,
+        select_model_ui,
+        get_in_memory_mode,
+        get_deployment_mode,
+    )
+
+    # ====================================
+    # Initialize and clear problematic state on startup
+    # ====================================
+    if 'app_initialized' not in st.session_state:
+        # Clear any media-related session state
+        for key in list(st.session_state.keys()):
+            if any(word in key.lower() for word in ['file', 'upload', 'media', 'image']):
+                del st.session_state[key]
+        
+        # Clear all cached data to prevent 404 errors
+        if hasattr(st, 'cache_data'):
+            st.cache_data.clear()
+        if hasattr(st, 'cache_resource'):
+            st.cache_resource.clear()
+        
+        st.session_state.app_initialized = True
+        st.rerun()  # Single page reload for all cleanup    
+
+    @st.cache_resource
+    def vector_store_builder(
+        pdf_path: str, _config: OmegaConf, uploaded: list | None
+    ) -> tuple[PDFManager, Retrievers]:
+        """
+        Process the uploaded PDF documents: load, chunk, and create a vector store.
+
+        Args:
+            pdf_path (str): Path to the folder containing PDF files.
+            config (OmegaConf): Configuration object.
+        """
+
+        logger.info("Building vector store for PDFs at path: %s", pdf_path)
+
+        # Step 1: Load and chunk
+        pdf_manager = PDFManager(pdf_path, _config)
+        pdf_manager.load_pdfs()
+        pdf_manager.chunk_documents()
+
+        # Step 2: Create vector store
+        pdf_manager.create_vectorstore()
+
+        # Step 3: Create retrievers
+        retrievers = Retrievers(pdf_manager, _config)
+        retrievers.setup_retrievers()
+
+        logger.info("Vector store and retrievers created successfully.")
+        return pdf_manager, retrievers
+
+    # logging from backend
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+    # ====================================
+    # Initialize Streamlit session state variables
+    # ====================================
+    def initialize_session_state() -> None:
+        """
+        Initialize necessary session state variables for Streamlit.
+        """
+        # Set 'debug' based on env var, but store it in session_state immediately
+        st.session_state.setdefault(
+            "debug", os.getenv("DEBUG_MODE", "false").lower() == "true"
+        )
+        st.session_state.setdefault("pdf_manager", None)
+        st.session_state.setdefault("retrievers", None)
+        st.session_state.setdefault("qa_chains", None)
+        st.session_state.setdefault("answer", "")
+        st.session_state.setdefault("qa_history", [])
+        st.session_state.setdefault("selected_model", None)
+        st.session_state.setdefault("llm_manager", None)
+        st.session_state.setdefault("model_changed", False)
+        st.session_state.setdefault("verbose", False)
+        st.session_state.setdefault("api_key", None)
+        # logger.debug("Session state initialized.")
+    
+    # Initialize session state variables
+    initialize_session_state()
+    logger.debug("Session state initialized.")
+
+    # Display the image at the top of the app
+    image_path = "/root/frontend/static/image.jpeg"
+    try:
+        # os.chdir("/root")  # Set working directory        
+        # if os.path.exists(image_path):
+        st.image(image_path, use_container_width=True)
+            # st.write(f"Image found at: {image_path}")
+        # else:
+            # st.write(f"Image not found at: {image_path}")
+    except Exception as e:
+        # logger.error(f"Error displaying image: {e}")
+        st.write(f"Error displaying image: {e}")
+
+    # Load configuration using OmegaConf
+    config = OmegaConf.load("configs/config.yaml")
+
+   # ==============================
+    # Constructing the Layout
+    # ==============================
+    st.title("Two-Stage RAG System for PDF Question Answering")
+    # st.subheader("Fast yet Precise Document Retrieval and Question Answering")
+    st.write(
+        "Start by **selecting a model** (OpenAI or Open Models) from **left sidebar**, then **upload your PDF files**, and finally **ask questions** to extract insights using the two-stage retrieval system."
+    )
+
+    # sidebar
+    st.sidebar.header("App Description")
+    st.sidebar.write(
+        "This application uses a two-stage retrieval-augmented generation (RAG) pipeline to efficiently extract information from PDF documents. "
+        "It combines lexical retrieval (BM25) with semantic retrieval (vector embeddings) in two consecutive stages."
+        "Upload your PDFs and ask questions to receive precise answers powered by either OpenAI's advanced models or free open-source models via Groq API (or llama-cpp-python in local deployment). "
+    )
+    # Show deployment mode
+    deployment_mode = os.getenv("DEPLOYMENT_MODE", "local")
+    deployment_emoji = "🏠" if deployment_mode == "local" else "☁️"
+    st.sidebar.info(
+        f"{deployment_emoji} **Deployment Mode:** {deployment_mode.title()}"
+    )
+    st.sidebar.info(
+        # f"""📊 **Storage Mode:** {get_in_memory_mode()}
+        # {get_in_memory_mode() == True}
+        # {get_in_memory_mode() == "true"}
+        # {bool(get_in_memory_mode())==True}
+        # """
+        f"📊 **Storage Mode:** {'In-Memory' if os.getenv('IN_MEMORY', 'false').lower() == 'true' else 'Persistent'}"
+    )
+
+    # Check verbose mode
+    if config.settings.verbose:
+        st.session_state.verbose = True
+        st.warning("Verbose mode is enabled.")
+
+    # Clear the vector store if needed
+    if st.session_state.verbose:
+        print(
+            "vector_store_cleared:", st.session_state.get("vector_store_cleared", False)
+        )
+    if (
+        not st.session_state.get("vector_store_cleared", False)
+        and config.Vectorstore.clear_existing
+    ):
+        shutil.rmtree(config.Vectorstore.persist_directory, ignore_errors=True)
+        # rebuild the vector store
+        st.session_state.vector_store_cleared = True
+
+    # Check debug mode
+    if st.session_state.debug:
+        st.warning("DEBUG MODE is ON")
+        logger.debug("Debug mode is enabled.")
+
+    # ==============================
+    # Model Selection
+    # ==============================
+    selected_model = select_model_ui(config)
+
+    if not selected_model:
+        st.stop()
+
+    # Check if model has changed
+    model_changed = (
+        st.session_state.selected_model is None
+        or st.session_state.selected_model.get("model_id")
+        != selected_model.get("model_id")
+        or st.session_state.selected_model.get("provider")
+        != selected_model.get("provider")
+    )
+
+    if model_changed:
+        st.session_state.model_changed = True
+        st.session_state.selected_model = selected_model
+        # Clear existing LLM manager and QA chains when model changes
+        st.session_state.llm_manager = None
+        st.session_state.qa_chains = None
+        st.session_state.answer = None
+
+        if st.session_state.verbose:
+            st.info(f"Model changed to: {selected_model['name']}")
+
+
+    # Initialize LLM Manager based on selected model
+    if st.session_state.llm_manager is None or model_changed:    
+        # OpenAI or Groq models
+        api_key = selected_model.get("api_key")
+        llm_manager = LLMManager(selected_model, api_key)
+
+        st.session_state.llm_manager = llm_manager
+        st.session_state.model_changed = False
+
+    # Get the current llm_manager from session state
+    llm_manager = st.session_state.llm_manager
+
+    if st.session_state.verbose:
+        print("====== Current llm choice and llm_manager:", selected_model, llm_manager)
+
+    # ==============================
+    # PDF Upload and vector store creation
+    # ==============================
+    uploaded, pdf_path = pdf_uploader_ui()
+    if uploaded is not None:
+        logger.info("PDF path provided: %s", pdf_path)
+        if st.session_state.debug:
+            st.write("pdfs path:", pdf_path)
+
+        # CLEAR ANSWER WHEN PROCESSING NEW PDFs
+        st.session_state.answer = None
+
+        # Build vector store
+        pdf_manager, retrievers = vector_store_builder(pdf_path, config, uploaded)
+        st.session_state.pdf_manager = pdf_manager
+        st.session_state.retrievers = retrievers
+
+        # Create QA chains with current LLM manager
+        st.session_state.qa_chains = QAchains(retrievers, config, llm_manager)
+        st.success("PDFs and vector store processed successfully!")
+
+    # Always ensure QA chains exist if we have retrievers and LLM manager
+    if (
+        st.session_state.get("retrievers") is not None
+        and st.session_state.get("llm_manager") is not None
+        and st.session_state.get("qa_chains") is None
+    ):
+
+        st.session_state.qa_chains = QAchains(
+            st.session_state.retrievers, config, st.session_state.llm_manager
+        )
+        st.info("QA system initialized with selected model!")
+
+    
+    # ==============================
+    # Question Section (only if retriever is successfully created)
+    # ==============================
+    if st.session_state.get("retrievers") is not None:
+        question, answer = question_input_output_ui(st.session_state.qa_chains)
+
+        if answer is not None:
+            st.session_state.answer = answer
+            # Store question, answer, and model info
+            model_info = f"{selected_model['name']} ({selected_model['provider']})"
+            st.session_state.qa_history.append((question, answer, model_info))
+            logger.info(
+                "Question answered: %s, answer: %s, model: %s",
+                question,
+                answer,
+                model_info,
+            )
+
+    # ==============================
+    # Display answer & history
+    # ==============================
+    display_results_ui(
+        answer=st.session_state.answer,
+        qa_history=st.session_state.qa_history,
+    )
+    logger.debug("Displayed results and history.")    
+    
+    # DATE_COLUMN = "date/time"
+    # DATA_URL = (
+    #     "https://s3-us-west-2.amazonaws.com/"
+    #     "streamlit-demo-data/uber-raw-data-sep14.csv.gz"
+    # )
+
+    # @st.cache_data
+    # def load_data(nrows):
+    #     data = pd.read_csv(DATA_URL, nrows=nrows)
+
+    #     def lowercase(x):
+    #         return str(x).lower()
+
+    #     data.rename(lowercase, axis="columns", inplace=True)
+    #     data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
+    #     return data
+
+
+
+    # data_load_state = st.text("Loading data...")
+    # data = load_data(10000)
+    # data_load_state.text("Done! (using st.cache_data)")
+
+    # if st.checkbox("Show raw data"):
+    #     st.subheader("Raw data")
+    #     st.write(data)
+
+    # st.subheader("Number of pickups by hour")
+    # hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0, 24))[0]
+    # st.bar_chart(hist_values)
+
+    # # Some number in the range 0-23
+    # hour_to_filter = st.slider("hour", 0, 23, 17)
+    # filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
+
+    # st.subheader("Map of all pickups at %s:00" % hour_to_filter)
+    # st.map(filtered_data)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 434d97b27954de2eedc9d705ccf979f5e2dc13da Mon Sep 17 00:00:00 2001
From: Bob Hosseini <bbkhosseini@gmail.com>
Date: Thu, 3 Jul 2025 20:40:25 +0200
Subject: [PATCH 2/4] sidebar wider

---
 .streamlit/config.toml   |  4 ++++
 modal/modal_streamlit.py | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/.streamlit/config.toml b/.streamlit/config.toml
index 55a91d6..09f3037 100644
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
@@ -40,6 +40,10 @@ magicEnabled = false
 # Memory optimization - keep this as false to avoid serialization errors
 enforceSerializableSessionState = false
 
+[ui]
+# Sidebar width configuration (default is 21rem, increase for wider sidebar)
+sidebarWidth = 100
+
 [theme]
 # Clean theme for better perceived performance
 primaryColor = "#1f77b4"
diff --git a/modal/modal_streamlit.py b/modal/modal_streamlit.py
index 877e53f..0c2aca9 100644
--- a/modal/modal_streamlit.py
+++ b/modal/modal_streamlit.py
@@ -33,6 +33,18 @@ def main():
         get_deployment_mode,
     )
 
+    # Adjust sidebar width
+    st.markdown(
+        """
+        <style>
+            section[data-testid="stSidebar"] {
+                width: 400px !important; /* Adjust this value as needed */
+            }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
     # ====================================
     # Initialize and clear problematic state on startup
     # ====================================

From 638a7fe699d1f5f584a0d6ec4e69b1c4ec931c0b Mon Sep 17 00:00:00 2001
From: Bob Hosseini <bbkhosseini@gmail.com>
Date: Fri, 4 Jul 2025 01:44:03 +0200
Subject: [PATCH 3/4] modal gpu

---
 .streamlit/config.toml | 19 +++++++++++++------
 modal/modal_app.py     |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.streamlit/config.toml b/.streamlit/config.toml
index 09f3037..ae28124 100644
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
@@ -44,10 +44,17 @@ enforceSerializableSessionState = false
 # Sidebar width configuration (default is 21rem, increase for wider sidebar)
 sidebarWidth = 100
 
+# [theme]
+# # Clean theme for better perceived performance
+# primaryColor = "#1f77b4"
+# backgroundColor = "#ffffff"
+# secondaryBackgroundColor = "#f0f2f6"
+# textColor = "#262730"
+# font = "sans serif"
+
 [theme]
-# Clean theme for better perceived performance
-primaryColor = "#1f77b4"
-backgroundColor = "#ffffff"
-secondaryBackgroundColor = "#f0f2f6"
-textColor = "#262730"
-font = "sans serif"
\ No newline at end of file
+base="light"
+backgroundColor="#F3E8FF"
+primaryColor="#6C4AB6"  # Optional: accent color
+secondaryBackgroundColor="#F8F1FF"
+textColor="#1C1C1C"
\ No newline at end of file
diff --git a/modal/modal_app.py b/modal/modal_app.py
index 0dc0d19..b432c9f 100644
--- a/modal/modal_app.py
+++ b/modal/modal_app.py
@@ -58,7 +58,7 @@
     )
 
 @app.function(
-        # gpu="T4:1",
+        gpu="A10G:1",
         secrets=[modal.Secret.from_name("groq-secret")]
         )
 @modal.concurrent(max_inputs=100)

From fdf8be1750307bf9f68b49817d1f49eb11075a57 Mon Sep 17 00:00:00 2001
From: Bob Hosseini <bbkhosseini@gmail.com>
Date: Fri, 4 Jul 2025 01:51:27 +0200
Subject: [PATCH 4/4] readme update

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 774fa05..bd74b76 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,13 @@
 <img src="photo.png" alt="Screenshot of the PDF Question Answering" width="1000"/>
 *Figure: Screenshot of the PDF Question Answering System Dashboard.*
 
+## 🚀 Live Demo
+
+**Try the application live:** [https://bbkhosseini--two-stage-conrag-run.modal.run/](https://bbkhosseini--two-stage-conrag-run.modal.run/)
+
+*Experience the Two-Stage Consecutive RAG system in action! Upload your PDFs and ask questions directly in your browser.*
+
+
 ## Table of Contents
 - [Introduction](#introduction)
 - [System Overview](#system-overview)