From 35cabee41a41aad211a0f22d505d4e39361cce68 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 17:18:25 +0000
Subject: [PATCH 1/9] Initial plan


From e248cd9a88d42e6511cc3ca12c0a350ce4fc5bdf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 17:27:24 +0000
Subject: [PATCH 2/9] Enhance /api/status endpoint with processing details,
 recent files, and statistics

Add new fields to the status API response:
- processing_details: breakdown of processing documents by status
- currently_processing: list of documents currently being processed
- recent_files: last 5 completed/failed files with timestamps
- total_pdfs: total document count
- failed_pdfs: failed document count
- avg_processing_seconds: average processing time

All existing fields are preserved for backward compatibility.
Add comprehensive unit tests for the enhanced status endpoint.

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/51a488f1-8693-4d02-894c-931f85fc9432
---
 tests/test_status_api.py      | 356 ++++++++++++++++++++++++++++++++++
 web_service/src/routes/api.py |  78 ++++++--
 2 files changed, 417 insertions(+), 17 deletions(-)
 create mode 100644 tests/test_status_api.py

diff --git a/tests/test_status_api.py b/tests/test_status_api.py
new file mode 100644
index 0000000..23bd786
--- /dev/null
+++ b/tests/test_status_api.py
@@ -0,0 +1,356 @@
+"""Tests for the enhanced /api/status endpoint."""
+
+import json
+import pytest
+import sys
+import os
+from unittest.mock import patch, MagicMock
+
+# Add paths for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../scansynclib'))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../web_service/src'))
+
+# Ensure the data directory exists for sqlite_wrapper module-level initialization
+os.makedirs(os.path.join(os.path.dirname(__file__), '../data'), exist_ok=True)
+
+# Mock Redis before any scansynclib imports, since settings.py connects at module level
+import redis as _real_redis
+_orig_from_url = _real_redis.Redis.from_url
+
+
+def _mock_from_url(*args, **kwargs):
+    mock_client = MagicMock()
+    mock_client.get.return_value = None  # No existing settings in Redis
+    mock_client.set.return_value = True
+    mock_client.publish.return_value = 0
+    mock_pubsub = MagicMock()
+    mock_pubsub.subscribe.return_value = None
+    mock_pubsub.listen.return_value = iter([])  # Empty iterator
+    mock_client.pubsub.return_value = mock_pubsub
+    return mock_client
+
+
+_real_redis.Redis.from_url = _mock_from_url
+
+
+@pytest.fixture
+def app():
+    """Create a Flask test app with the api blueprint."""
+    from flask import Flask
+    from routes.api import api_bp
+
+    app = Flask(__name__)
+    app.register_blueprint(api_bp)
+    app.config['TESTING'] = True
+    return app
+
+
+@pytest.fixture
+def client(app):
+    """Create a Flask test client."""
+    return app.test_client()
+
+
+class TestStatusAPI:
+    """Test cases for the enhanced /api/status endpoint."""
+
+    def test_status_returns_backward_compatible_fields(self, client):
+        """Test that all original response fields are still present."""
+        summary_result = {
+            'processed_pdfs': 10,
+            'processing_pdfs': 2,
+            'latest_processing_timestamp': '2024-06-01 12:00:00',
+            'latest_completed_timestamp': '2024-06-01 11:30:00',
+            'latest_created_name': 'invoice.pdf',
+            'latest_created_status': 2,
+            'total_pdfs': 15,
+            'failed_pdfs': 3,
+            'avg_processing_seconds': 45.678,
+        }
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,   # summary query
+                [],               # processing_details query
+                [],               # currently_processing query
+                [],               # recent_files query
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert data['processed_pdfs'] == 10
+        assert data['processing_pdfs'] == 2
+        assert data['latest_processing_timestamp'] == '2024-06-01 12:00:00'
+        assert data['latest_completed_timestamp'] == '2024-06-01 11:30:00'
+        assert data['latest_created_name'] == 'invoice.pdf'
+        assert data['latest_created_status'] == 2
+
+    def test_status_returns_new_fields(self, client):
+        """Test that all new response fields are present."""
+        summary_result = {
+            'processed_pdfs': 10,
+            'processing_pdfs': 2,
+            'latest_processing_timestamp': '2024-06-01 12:00:00',
+            'latest_completed_timestamp': '2024-06-01 11:30:00',
+            'latest_created_name': 'invoice.pdf',
+            'latest_created_status': 2,
+            'total_pdfs': 15,
+            'failed_pdfs': 3,
+            'avg_processing_seconds': 45.678,
+        }
+
+        processing_details = [
+            {'status': 'OCR Processing', 'status_code': 2, 'count': 1},
+            {'status': 'Syncing', 'status_code': 4, 'count': 1},
+        ]
+
+        currently_processing = [
+            {'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3},
+            {'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1},
+        ]
+
+        recent_files = [
+            {'id': 11, 'file_name': 'doc1.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:01:00', 'pdf_pages': 2},
+            {'id': 10, 'file_name': 'doc2.pdf', 'status': 'Failed', 'status_code': -1, 'created': '2024-06-01 09:00:00', 'completed': '2024-06-01 09:00:30', 'pdf_pages': 0},
+        ]
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                processing_details,
+                currently_processing,
+                recent_files,
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        # New fields
+        assert data['total_pdfs'] == 15
+        assert data['failed_pdfs'] == 3
+        assert data['avg_processing_seconds'] == 45.68  # rounded to 2 decimal places
+        assert len(data['processing_details']) == 2
+        assert data['processing_details'][0]['status'] == 'OCR Processing'
+        assert data['processing_details'][0]['count'] == 1
+        assert len(data['currently_processing']) == 2
+        assert data['currently_processing'][0]['file_name'] == 'scan1.pdf'
+        assert len(data['recent_files']) == 2
+        assert data['recent_files'][0]['file_name'] == 'doc1.pdf'
+        assert data['recent_files'][1]['status'] == 'Failed'
+
+    def test_status_no_data_returns_404(self, client):
+        """Test that 404 is returned when no data exists."""
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.return_value = None
+            response = client.get('/api/status')
+
+        assert response.status_code == 404
+        data = json.loads(response.data)
+        assert 'error' in data
+
+    def test_status_empty_processing(self, client):
+        """Test response when no documents are currently processing."""
+        summary_result = {
+            'processed_pdfs': 5,
+            'processing_pdfs': 0,
+            'latest_processing_timestamp': None,
+            'latest_completed_timestamp': '2024-06-01 11:30:00',
+            'latest_created_name': 'doc.pdf',
+            'latest_created_status': 5,
+            'total_pdfs': 5,
+            'failed_pdfs': 0,
+            'avg_processing_seconds': 30.0,
+        }
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                [],  # no processing details
+                [],  # no currently processing
+                [{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}],
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert data['processing_pdfs'] == 0
+        assert data['processing_details'] == []
+        assert data['currently_processing'] == []
+        assert len(data['recent_files']) == 1
+
+    def test_status_null_avg_processing(self, client):
+        """Test response when avg_processing_seconds is None (no completed docs)."""
+        summary_result = {
+            'processed_pdfs': 0,
+            'processing_pdfs': 1,
+            'latest_processing_timestamp': '2024-06-01 12:00:00',
+            'latest_completed_timestamp': None,
+            'latest_created_name': 'new.pdf',
+            'latest_created_status': 1,
+            'total_pdfs': 1,
+            'failed_pdfs': 0,
+            'avg_processing_seconds': None,
+        }
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}],
+                [{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}],
+                [],
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert data['avg_processing_seconds'] is None
+        assert data['processed_pdfs'] == 0
+        assert len(data['currently_processing']) == 1
+
+    def test_status_database_error(self, client):
+        """Test that database errors return 500."""
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = Exception("Database connection failed")
+            response = client.get('/api/status')
+
+        assert response.status_code == 500
+        data = json.loads(response.data)
+        assert 'error' in data
+
+    def test_status_recent_files_limit(self, client):
+        """Test that recent_files returns at most 5 entries."""
+        summary_result = {
+            'processed_pdfs': 10,
+            'processing_pdfs': 0,
+            'latest_processing_timestamp': None,
+            'latest_completed_timestamp': '2024-06-01 12:00:00',
+            'latest_created_name': 'doc10.pdf',
+            'latest_created_status': 5,
+            'total_pdfs': 10,
+            'failed_pdfs': 0,
+            'avg_processing_seconds': 25.0,
+        }
+
+        # Simulate query returning exactly 5 recent files
+        recent = [
+            {'id': i, 'file_name': f'doc{i}.pdf', 'status': 'Completed', 'status_code': 5,
+             'created': f'2024-06-01 {10+i}:00:00', 'completed': f'2024-06-01 {10+i}:01:00', 'pdf_pages': i}
+            for i in range(5)
+        ]
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                [],
+                [],
+                recent,
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert len(data['recent_files']) == 5
+
+    def test_status_processing_details_structure(self, client):
+        """Test the structure of processing_details entries."""
+        summary_result = {
+            'processed_pdfs': 5,
+            'processing_pdfs': 3,
+            'latest_processing_timestamp': '2024-06-01 12:00:00',
+            'latest_completed_timestamp': '2024-06-01 11:00:00',
+            'latest_created_name': 'test.pdf',
+            'latest_created_status': 2,
+            'total_pdfs': 8,
+            'failed_pdfs': 0,
+            'avg_processing_seconds': 40.0,
+        }
+
+        processing_details = [
+            {'status': 'Reading Metadata', 'status_code': 1, 'count': 1},
+            {'status': 'OCR Processing', 'status_code': 2, 'count': 2},
+        ]
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                processing_details,
+                [],
+                [],
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        for detail in data['processing_details']:
+            assert 'status' in detail
+            assert 'status_code' in detail
+            assert 'count' in detail
+
+    def test_status_includes_failed_in_recent(self, client):
+        """Test that failed documents appear in recent_files."""
+        summary_result = {
+            'processed_pdfs': 3,
+            'processing_pdfs': 0,
+            'latest_processing_timestamp': None,
+            'latest_completed_timestamp': '2024-06-01 12:00:00',
+            'latest_created_name': 'failed.pdf',
+            'latest_created_status': -1,
+            'total_pdfs': 5,
+            'failed_pdfs': 2,
+            'avg_processing_seconds': 30.0,
+        }
+
+        recent_files = [
+            {'id': 5, 'file_name': 'ok.pdf', 'status': 'Completed', 'status_code': 5,
+             'created': '2024-06-01 12:00:00', 'completed': '2024-06-01 12:01:00', 'pdf_pages': 2},
+            {'id': 4, 'file_name': 'failed.pdf', 'status': 'Failed', 'status_code': -1,
+             'created': '2024-06-01 11:00:00', 'completed': '2024-06-01 11:00:05', 'pdf_pages': 0},
+            {'id': 3, 'file_name': 'invalid.pdf', 'status': 'Invalid File', 'status_code': -1,
+             'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:01', 'pdf_pages': 0},
+        ]
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                [],
+                [],
+                recent_files,
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert data['failed_pdfs'] == 2
+        statuses = [f['status'] for f in data['recent_files']]
+        assert 'Failed' in statuses
+        assert 'Invalid File' in statuses
+
+    def test_status_execute_query_returns_none_for_lists(self, client):
+        """Test that None results from list queries are handled gracefully."""
+        summary_result = {
+            'processed_pdfs': 1,
+            'processing_pdfs': 0,
+            'latest_processing_timestamp': None,
+            'latest_completed_timestamp': '2024-06-01 12:00:00',
+            'latest_created_name': 'doc.pdf',
+            'latest_created_status': 5,
+            'total_pdfs': 1,
+            'failed_pdfs': 0,
+            'avg_processing_seconds': 10.0,
+        }
+
+        with patch('routes.api.execute_query') as mock_query:
+            mock_query.side_effect = [
+                summary_result,
+                None,  # processing_details returns None
+                None,  # currently_processing returns None
+                None,  # recent_files returns None
+            ]
+            response = client.get('/api/status')
+            data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert data['processing_details'] == []
+        assert data['currently_processing'] == []
+        assert data['recent_files'] == []
diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index e399c53..742a8dc 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -70,30 +70,74 @@ def delete_openai_settings():
 def get_status():
     # logger.info("Received request to get status")
     try:
-        query = """
-            SELECT *,
+        # Core summary query (backward compatible)
+        summary_query = """
+            SELECT
                 (SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs,
                 (SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs,
                 (SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
                 (SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp,
                 (SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name,
-                (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status
-            FROM scanneddata
-            ORDER BY created DESC, id DESC
+                (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status,
+                (SELECT COUNT(*) FROM scanneddata) AS total_pdfs,
+                (SELECT COUNT(*) FROM scanneddata WHERE status_code < 0) AS failed_pdfs,
+                (SELECT AVG((JULIANDAY(modified) - JULIANDAY(created)) * 86400) FROM scanneddata WHERE status_code = 5) AS avg_processing_seconds
         """
-        result = execute_query(query, fetchone=True)
-        if result:
-            response = {
-                'processed_pdfs': result.get('processed_pdfs', 0),
-                'processing_pdfs': result.get('processing_pdfs', 0),
-                'latest_processing_timestamp': result.get('latest_processing_timestamp', None),
-                'latest_completed_timestamp': result.get('latest_completed_timestamp', None),
-                'latest_created_name': result.get('latest_created_name', None),
-                'latest_created_status': result.get('latest_created_status', None)
-            }
-            return jsonify(response), 200
-        else:
+        result = execute_query(summary_query, fetchone=True)
+        if not result:
             return jsonify({'error': 'No data found'}), 404
+
+        # Breakdown of currently processing documents by status
+        processing_details_query = """
+            SELECT file_status AS status, status_code, COUNT(*) AS count
+            FROM scanneddata
+            WHERE status_code BETWEEN 0 AND 4
+            GROUP BY file_status, status_code
+            ORDER BY status_code ASC
+        """
+        processing_details = execute_query(processing_details_query, fetchall=True) or []
+
+        # Currently processing documents (individual items)
+        currently_processing_query = """
+            SELECT id, file_name, file_status AS status, status_code,
+                   DATETIME(created) AS created, pdf_pages
+            FROM scanneddata
+            WHERE status_code BETWEEN 0 AND 4
+            ORDER BY created DESC
+        """
+        currently_processing = execute_query(currently_processing_query, fetchall=True) or []
+
+        # Last 5 recently finished files (completed or failed)
+        recent_files_query = """
+            SELECT id, file_name, file_status AS status, status_code,
+                   DATETIME(created) AS created, DATETIME(modified) AS completed,
+                   pdf_pages
+            FROM scanneddata
+            WHERE status_code = 5 OR status_code < 0
+            ORDER BY modified DESC
+            LIMIT 5
+        """
+        recent_files = execute_query(recent_files_query, fetchall=True) or []
+
+        avg_seconds = result.get('avg_processing_seconds', None)
+
+        response = {
+            # Existing fields (backward compatible)
+            'processed_pdfs': result.get('processed_pdfs', 0),
+            'processing_pdfs': result.get('processing_pdfs', 0),
+            'latest_processing_timestamp': result.get('latest_processing_timestamp', None),
+            'latest_completed_timestamp': result.get('latest_completed_timestamp', None),
+            'latest_created_name': result.get('latest_created_name', None),
+            'latest_created_status': result.get('latest_created_status', None),
+            # New fields
+            'total_pdfs': result.get('total_pdfs', 0),
+            'failed_pdfs': result.get('failed_pdfs', 0),
+            'avg_processing_seconds': round(avg_seconds, 2) if avg_seconds is not None else None,
+            'processing_details': processing_details,
+            'currently_processing': currently_processing,
+            'recent_files': recent_files,
+        }
+        return jsonify(response), 200
     except Exception as e:
         err = f"Error fetching status: {e}"
         logger.exception(err)

From c121056355283ab95e0a9a6bd742c1b8c2f4e8b2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 18:03:30 +0000
Subject: [PATCH 3/9] Add /api/status documentation to README

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/bd224ccb-e72f-459a-81b6-091b19b464d3
---
 README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/README.md b/README.md
index d7735da..4f59ffe 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,73 @@ For development purposes, you can use the built-in Flask server:
 3. Run pytests via the [run-tests.sh](run-tests.sh) script (Spins up a docker [test-service](/test_service/Dockerfile))
 
 
+## 📡 API
+
+### `GET /api/status`
+
+Returns aggregated document processing status including per-stage breakdowns, currently processing items, and recent completion history.
+
+**Response fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `processed_pdfs` | `int` | Count of completed documents |
+| `processing_pdfs` | `int` | Count of in-progress documents |
+| `latest_processing_timestamp` | `string\|null` | Most recent processing update timestamp |
+| `latest_completed_timestamp` | `string\|null` | Most recent completion timestamp |
+| `latest_created_name` | `string\|null` | Filename of the latest document |
+| `latest_created_status` | `int\|null` | Status code of the latest document |
+| `total_pdfs` | `int` | Total document count across all statuses |
+| `failed_pdfs` | `int` | Count of failed documents |
+| `avg_processing_seconds` | `float\|null` | Average processing time for completed documents |
+| `processing_details` | `array` | Breakdown of in-progress documents grouped by status |
+| `currently_processing` | `array` | List of individual documents currently being processed |
+| `recent_files` | `array` | Last 5 completed or failed documents with timestamps |
+
+<details>
+<summary>Example response</summary>
+
+```json
+{
+  "processed_pdfs": 10,
+  "processing_pdfs": 3,
+  "latest_processing_timestamp": "2024-06-01 12:00:00",
+  "latest_completed_timestamp": "2024-06-01 11:30:00",
+  "latest_created_name": "invoice.pdf",
+  "latest_created_status": 2,
+  "total_pdfs": 15,
+  "failed_pdfs": 2,
+  "avg_processing_seconds": 45.68,
+  "processing_details": [
+    {"status": "OCR Processing", "status_code": 2, "count": 2},
+    {"status": "Reading Metadata", "status_code": 1, "count": 1}
+  ],
+  "currently_processing": [
+    {
+      "id": 12,
+      "file_name": "scan1.pdf",
+      "status": "OCR Processing",
+      "status_code": 2,
+      "created": "2024-06-01 12:00:00",
+      "pdf_pages": 3
+    }
+  ],
+  "recent_files": [
+    {
+      "id": 11,
+      "file_name": "doc1.pdf",
+      "status": "Completed",
+      "status_code": 5,
+      "created": "2024-06-01 10:00:00",
+      "completed": "2024-06-01 10:01:00",
+      "pdf_pages": 2
+    }
+  ]
+}
+```
+
+</details>
+
 ## 🔮 Upcoming Features
 - **Notifications**: Stay informed with real-time updates.
 - **OCR Settings**: Take control of OCR settings in the web interface

From 5253604b3dd96371c4c242554c92a0cbfb870151 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:11:35 +0000
Subject: [PATCH 4/9] Reduce SQL queries from 4 to 3 by deriving
 processing_details from currently_processing in Python

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/4abb078b-a869-4988-be63-1ebc9ce473d5
---
 tests/test_status_api.py      | 22 +++++-----------------
 web_service/src/routes/api.py | 23 ++++++++++++-----------
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/tests/test_status_api.py b/tests/test_status_api.py
index 23bd786..d91c356 100644
--- a/tests/test_status_api.py
+++ b/tests/test_status_api.py
@@ -71,7 +71,6 @@ def test_status_returns_backward_compatible_fields(self, client):
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,   # summary query
-                [],               # processing_details query
                 [],               # currently_processing query
                 [],               # recent_files query
             ]
@@ -100,11 +99,6 @@ def test_status_returns_new_fields(self, client):
             'avg_processing_seconds': 45.678,
         }
 
-        processing_details = [
-            {'status': 'OCR Processing', 'status_code': 2, 'count': 1},
-            {'status': 'Syncing', 'status_code': 4, 'count': 1},
-        ]
-
         currently_processing = [
             {'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3},
             {'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1},
@@ -118,7 +112,6 @@ def test_status_returns_new_fields(self, client):
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,
-                processing_details,
                 currently_processing,
                 recent_files,
             ]
@@ -166,7 +159,6 @@ def test_status_empty_processing(self, client):
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,
-                [],  # no processing details
                 [],  # no currently processing
                 [{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}],
             ]
@@ -196,7 +188,6 @@ def test_status_null_avg_processing(self, client):
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,
-                [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}],
                 [{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}],
                 [],
             ]
@@ -243,7 +234,6 @@ def test_status_recent_files_limit(self, client):
             mock_query.side_effect = [
                 summary_result,
                 [],
-                [],
                 recent,
             ]
             response = client.get('/api/status')
@@ -266,16 +256,16 @@ def test_status_processing_details_structure(self, client):
             'avg_processing_seconds': 40.0,
         }
 
-        processing_details = [
-            {'status': 'Reading Metadata', 'status_code': 1, 'count': 1},
-            {'status': 'OCR Processing', 'status_code': 2, 'count': 2},
+        currently_processing = [
+            {'id': 1, 'file_name': 'a.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 1},
+            {'id': 2, 'file_name': 'b.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:59:00', 'pdf_pages': 2},
+            {'id': 3, 'file_name': 'c.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:58:00', 'pdf_pages': 3},
         ]
 
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,
-                processing_details,
-                [],
+                currently_processing,
                 [],
             ]
             response = client.get('/api/status')
@@ -314,7 +304,6 @@ def test_status_includes_failed_in_recent(self, client):
             mock_query.side_effect = [
                 summary_result,
                 [],
-                [],
                 recent_files,
             ]
             response = client.get('/api/status')
@@ -343,7 +332,6 @@ def test_status_execute_query_returns_none_for_lists(self, client):
         with patch('routes.api.execute_query') as mock_query:
             mock_query.side_effect = [
                 summary_result,
-                None,  # processing_details returns None
                 None,  # currently_processing returns None
                 None,  # recent_files returns None
             ]
diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index 742a8dc..b7d3598 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -87,17 +87,8 @@ def get_status():
         if not result:
             return jsonify({'error': 'No data found'}), 404
 
-        # Breakdown of currently processing documents by status
-        processing_details_query = """
-            SELECT file_status AS status, status_code, COUNT(*) AS count
-            FROM scanneddata
-            WHERE status_code BETWEEN 0 AND 4
-            GROUP BY file_status, status_code
-            ORDER BY status_code ASC
-        """
-        processing_details = execute_query(processing_details_query, fetchall=True) or []
-
-        # Currently processing documents (individual items)
+        # Currently processing documents (individual items) — also used to
+        # derive processing_details breakdown, avoiding a separate GROUP BY query.
         currently_processing_query = """
             SELECT id, file_name, file_status AS status, status_code,
                    DATETIME(created) AS created, pdf_pages
@@ -107,6 +98,16 @@ def get_status():
         """
         currently_processing = execute_query(currently_processing_query, fetchall=True) or []
 
+        # Derive processing_details from currently_processing in Python
+        details_map: dict[tuple[str, int], int] = {}
+        for item in currently_processing:
+            key = (item['status'], item['status_code'])
+            details_map[key] = details_map.get(key, 0) + 1
+        processing_details = sorted(
+            [{'status': s, 'status_code': sc, 'count': c} for (s, sc), c in details_map.items()],
+            key=lambda d: d['status_code'],
+        )
+
         # Last 5 recently finished files (completed or failed)
         recent_files_query = """
             SELECT id, file_name, file_status AS status, status_code,

From f39555a7068458ef844ba39f36fdd187200a883a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:22:55 +0000
Subject: [PATCH 5/9] Fix latest_processing_timestamp to exclude deleted/failed
 docs (status_code < 0)

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/992614c5-ed42-428c-96b2-256ccf0585c4
---
 web_service/src/routes/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index b7d3598..706b8bb 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -75,7 +75,7 @@ def get_status():
             SELECT
                 (SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs,
                 (SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs,
-                (SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
+                (SELECT DATETIME(created) FROM scanneddata WHERE status_code BETWEEN 0 AND 4 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
                 (SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp,
                 (SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name,
                 (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status,

From 11f442b854d296dbd23b54168338579e010cb603 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:39:36 +0000
Subject: [PATCH 6/9] Fix CI: add apt-get upgrade to resolve dpkg package
 conflict in test_service Dockerfile

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/ad59c16f-0584-4752-afa2-c81172389beb
---
 test_service/Dockerfile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test_service/Dockerfile b/test_service/Dockerfile
index decf732..99db41c 100644
--- a/test_service/Dockerfile
+++ b/test_service/Dockerfile
@@ -4,8 +4,9 @@ FROM seleniarm/standalone-chromium:latest
 # Switch to the root user to install dependencies
 USER root
 
-# Update package lists and install Python3, pip, and venv
-RUN apt-get update && apt-get install -y python3 python3-pip python3-venv
+# Update package lists, upgrade base packages to resolve potential version
+# conflicts with newer repository packages, then install Python3 toolchain.
+RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv
 
 # Create a Python virtual environment
 RUN python3 -m venv /opt/venv

From 35343b4d049bc4c4258d78e235f5248952151839 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:55:25 +0000
Subject: [PATCH 7/9] Fix CI: use --force-overwrite for dpkg to handle openssl
 package split conflict

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/5e7d9707-f37f-4978-a7ad-066038fcc966
---
 test_service/Dockerfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test_service/Dockerfile b/test_service/Dockerfile
index 99db41c..eed99f5 100644
--- a/test_service/Dockerfile
+++ b/test_service/Dockerfile
@@ -4,9 +4,10 @@ FROM seleniarm/standalone-chromium:latest
 # Switch to the root user to install dependencies
 USER root
 
-# Update package lists, upgrade base packages to resolve potential version
-# conflicts with newer repository packages, then install Python3 toolchain.
-RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv
+# Update package lists and install Python3, pip, and venv.
+# --force-overwrite handles file ownership changes during package splits
+# (e.g. openssl-provider-legacy taking legacy.so from libssl3t64).
+RUN apt-get update && apt-get install -y -o Dpkg::Options::="--force-overwrite" python3 python3-pip python3-venv
 
 # Create a Python virtual environment
 RUN python3 -m venv /opt/venv

From 757ab68ae9d51e92e7ad1a882fd25d281e03622e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 20:15:48 +0000
Subject: [PATCH 8/9] Fix web service crash when RabbitMQ unavailable: handle
 None return from connect_rabbitmq()

Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/a640e2b5-ce42-4e6e-85f4-5790011e0046
---
 scansynclib/scansynclib.egg-info/PKG-INFO      | 12 ++++++++++++
 scansynclib/scansynclib.egg-info/SOURCES.txt   | 18 ++++++++++++++++++
 .../scansynclib.egg-info/dependency_links.txt  |  1 +
 scansynclib/scansynclib.egg-info/requires.txt  |  7 +++++++
 scansynclib/scansynclib.egg-info/top_level.txt |  1 +
 web_service/src/main.py                        |  6 +++++-
 6 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 scansynclib/scansynclib.egg-info/PKG-INFO
 create mode 100644 scansynclib/scansynclib.egg-info/SOURCES.txt
 create mode 100644 scansynclib/scansynclib.egg-info/dependency_links.txt
 create mode 100644 scansynclib/scansynclib.egg-info/requires.txt
 create mode 100644 scansynclib/scansynclib.egg-info/top_level.txt

diff --git a/scansynclib/scansynclib.egg-info/PKG-INFO b/scansynclib/scansynclib.egg-info/PKG-INFO
new file mode 100644
index 0000000..28249e6
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/PKG-INFO
@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: scansynclib
+Version: 0.1.0
+Summary: Shared helper library for ScanSync
+Requires-Python: >=3.13
+Requires-Dist: colorlog
+Requires-Dist: tenacity
+Requires-Dist: pika
+Requires-Dist: openai
+Requires-Dist: msal
+Requires-Dist: pypdf
+Requires-Dist: redis
diff --git a/scansynclib/scansynclib.egg-info/SOURCES.txt b/scansynclib/scansynclib.egg-info/SOURCES.txt
new file mode 100644
index 0000000..87bba7e
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/SOURCES.txt
@@ -0,0 +1,18 @@
+pyproject.toml
+./scansynclib/ProcessItem.py
+./scansynclib/__init__.py
+./scansynclib/config.py
+./scansynclib/helpers.py
+./scansynclib/logging.py
+./scansynclib/ollama_helper.py
+./scansynclib/onedrive_api.py
+./scansynclib/onedrive_smb_manager.py
+./scansynclib/openai_helper.py
+./scansynclib/settings.py
+./scansynclib/settings_schema.py
+./scansynclib/sqlite_wrapper.py
+scansynclib.egg-info/PKG-INFO
+scansynclib.egg-info/SOURCES.txt
+scansynclib.egg-info/dependency_links.txt
+scansynclib.egg-info/requires.txt
+scansynclib.egg-info/top_level.txt
\ No newline at end of file
diff --git a/scansynclib/scansynclib.egg-info/dependency_links.txt b/scansynclib/scansynclib.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/scansynclib/scansynclib.egg-info/requires.txt b/scansynclib/scansynclib.egg-info/requires.txt
new file mode 100644
index 0000000..8d18428
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/requires.txt
@@ -0,0 +1,7 @@
+colorlog
+tenacity
+pika
+openai
+msal
+pypdf
+redis
diff --git a/scansynclib/scansynclib.egg-info/top_level.txt b/scansynclib/scansynclib.egg-info/top_level.txt
new file mode 100644
index 0000000..d38122b
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/top_level.txt
@@ -0,0 +1 @@
+scansynclib
diff --git a/web_service/src/main.py b/web_service/src/main.py
index a274290..b584110 100644
--- a/web_service/src/main.py
+++ b/web_service/src/main.py
@@ -41,7 +41,11 @@ def start_rabbitmq_listener():
 def rabbitmq_listener():
     logger.info("Started RabbitMQ listener thread.")
 
-    connection, channel = connect_rabbitmq()
+    result = connect_rabbitmq()
+    if result is None:
+        logger.warning("RabbitMQ is not available. SSE updates will be disabled.")
+        return
+    connection, channel = result
 
     # Use fanout as exchange type to broadcast messages to all connected clients
     exchange_name = "sse_updates_fanout"

From 0c096b2534ff73998aea6e03b61a2e6c5332f059 Mon Sep 17 00:00:00 2001
From: Maximilian Krause <maxi.krause@me.com>
Date: Tue, 24 Mar 2026 22:36:41 +0100
Subject: [PATCH 9/9] fix pytests

---
 docker-compose.test.yml             |  4 ++--
 docker-compose.yml                  |  3 +--
 scansynclib/scansynclib/config.json |  2 +-
 tests/test_homepage.py              | 24 ++++++++++++++----------
 web_service/src/routes/settings.py  |  4 ++--
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/docker-compose.test.yml b/docker-compose.test.yml
index 866e9e6..c5d1b50 100644
--- a/docker-compose.test.yml
+++ b/docker-compose.test.yml
@@ -1,5 +1,5 @@
 services:
-  web_service:
+  web-service:
     build:
       context: .
       dockerfile: web_service/Dockerfile
@@ -29,7 +29,7 @@ services:
       context: .
       dockerfile: test_service/Dockerfile
     depends_on:
-      - web_service
+      - web-service
       - redis
     networks:
       - test-network
diff --git a/docker-compose.yml b/docker-compose.yml
index 7e94ca0..d9f031b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -103,8 +103,7 @@ services:
       - rabbitmq
     command: ["python", "main.py"]
   
-  web_service:
-    image: web_service
+  web-service:
     restart: unless-stopped
     build:
       context: .
diff --git a/scansynclib/scansynclib/config.json b/scansynclib/scansynclib/config.json
index 261cef2..ce89d77 100644
--- a/scansynclib/scansynclib/config.json
+++ b/scansynclib/scansynclib/config.json
@@ -1,5 +1,5 @@
 {
-    "version": "0.4.0",
+    "version": "0.4.1",
     "failedDir": "failed-documents",
     "db": {
         "path": "data/scansync.db"
diff --git a/tests/test_homepage.py b/tests/test_homepage.py
index 71d0e85..8e2db24 100644
--- a/tests/test_homepage.py
+++ b/tests/test_homepage.py
@@ -23,7 +23,7 @@ def driver():
 
 
 def test_dashboard_text_first_start(driver):
-    driver.get("http://web_service:5001")
+    driver.get("http://web-service:5001")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
     assert "ScanSync" in driver.title
     assert "Get started in three steps:" in driver.page_source
@@ -41,7 +41,7 @@ def test_dashboard_text_first_start(driver):
 
 
 def test_dashboard_sync_first_start(driver):
-    driver.get("http://web_service:5001/sync")
+    driver.get("http://web-service:5001/sync")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
     assert "ScanSync" in driver.title
     assert "Set up or manage your OneDrive connections for syncing." in driver.page_source
@@ -53,7 +53,7 @@ def test_dashboard_sync_first_start(driver):
 
 
 def test_dashboard_settings_first_start_onedrive(driver):
-    driver.get("http://web_service:5001/settings?tab=onedrive-tab")
+    driver.get("http://web-service:5001/settings?tab=onedrive-tab")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
     assert "ScanSync" in driver.title
     assert "Settings" in driver.find_element(By.TAG_NAME, "h1").text
@@ -63,7 +63,7 @@ def test_dashboard_settings_first_start_onedrive(driver):
 
 
 def test_dashboard_settings_tabs(driver):
-    driver.get("http://web_service:5001/settings?tab=ocr-tab")
+    driver.get("http://web-service:5001/settings?tab=ocr-tab")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
 
     assert "OCR settings will be available in the future." in driver.page_source
@@ -77,7 +77,7 @@ def test_dashboard_settings_tabs(driver):
 
 
 def test_dashboard_settings_file_naming_first_start(driver):
-    driver.get("http://web_service:5001/settings?tab=file-naming-tab")
+    driver.get("http://web-service:5001/settings?tab=file-naming-tab")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
     assert "ScanSync" in driver.title
     assert "Choose your automatic file naming method:" in driver.page_source
@@ -93,7 +93,7 @@ def test_dashboard_settings_file_naming_first_start(driver):
 
 
 def test_dashboard_settings_ollama_first_start(driver):
-    driver.get("http://web_service:5001/settings?tab=file-naming-tab")
+    driver.get("http://web-service:5001/settings?tab=file-naming-tab")
     WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
     assert "ScanSync" in driver.title
 
@@ -115,8 +115,12 @@ def test_dashboard_settings_ollama_first_start(driver):
     assert driver.find_element(By.ID, "ollama_server_port").get_attribute("value") == "11434"
 
     driver.find_element(By.ID, "ollama-connect-btn").click()
-    WebDriverWait(driver, 10).until(
-        EC.visibility_of_element_located((By.ID, "ollama-error"))
+    # Wait for either the error div or the models section to become visible,
+    # depending on whether Ollama is reachable in the test environment.
+    WebDriverWait(driver, 15).until(
+        lambda d: d.find_element(By.ID, "ollama-error").is_displayed()
+        or d.find_element(By.ID, "ollama-models-section").is_displayed()
     )
-    ollama_error = driver.find_element(By.ID, "ollama-error").text
-    assert "Could not connect to Ollama server." in ollama_error
+    error_div = driver.find_element(By.ID, "ollama-error")
+    models_section = driver.find_element(By.ID, "ollama-models-section")
+    assert error_div.is_displayed() or models_section.is_displayed()
diff --git a/web_service/src/routes/settings.py b/web_service/src/routes/settings.py
index 1227fbf..aa150f9 100644
--- a/web_service/src/routes/settings.py
+++ b/web_service/src/routes/settings.py
@@ -154,7 +154,7 @@ def get_ollama_version():
     logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/version")
     try:
         full_url = f"{scheme}://{url}:{port}/api/version"
-        response = requests.get(full_url, timeout=10)
+        response = requests.get(full_url, timeout=(2, 3))
         if response.status_code == 200:
             logger.debug(f"Ollama server version response: {response.json()}")
             return Response(json.dumps(response.json()), status=200, mimetype='application/json')
@@ -199,7 +199,7 @@ def get_ollama_models():
     logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/tags")
     try:
         full_url = f"{scheme}://{url}:{port}/api/tags"
-        response = requests.get(full_url, timeout=10)
+        response = requests.get(full_url, timeout=(2, 3))
         logger.debug(f"Ollama server models response: {response.status_code} - {response.text}")
         if response.status_code == 200:
             return Response(json.dumps(response.json()), status=200, mimetype='application/json')