From 35cabee41a41aad211a0f22d505d4e39361cce68 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 17:18:25 +0000
Subject: [PATCH 1/9] Initial plan
From e248cd9a88d42e6511cc3ca12c0a350ce4fc5bdf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 17:27:24 +0000
Subject: [PATCH 2/9] Enhance /api/status endpoint with processing details,
recent files, and statistics
Add new fields to the status API response:
- processing_details: breakdown of processing documents by status
- currently_processing: list of documents currently being processed
- recent_files: last 5 completed/failed files with timestamps
- total_pdfs: total document count
- failed_pdfs: failed document count
- avg_processing_seconds: average processing time
All existing fields are preserved for backward compatibility.
Add comprehensive unit tests for the enhanced status endpoint.
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/51a488f1-8693-4d02-894c-931f85fc9432
---
tests/test_status_api.py | 356 ++++++++++++++++++++++++++++++++++
web_service/src/routes/api.py | 78 ++++++--
2 files changed, 417 insertions(+), 17 deletions(-)
create mode 100644 tests/test_status_api.py
diff --git a/tests/test_status_api.py b/tests/test_status_api.py
new file mode 100644
index 0000000..23bd786
--- /dev/null
+++ b/tests/test_status_api.py
@@ -0,0 +1,356 @@
+"""Tests for the enhanced /api/status endpoint."""
+
+import json
+import pytest
+import sys
+import os
+from unittest.mock import patch, MagicMock
+
+# Add paths for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../scansynclib'))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../web_service/src'))
+
+# Ensure the data directory exists for sqlite_wrapper module-level initialization
+os.makedirs(os.path.join(os.path.dirname(__file__), '../data'), exist_ok=True)
+
+# Mock Redis before any scansynclib imports, since settings.py connects at module level
+import redis as _real_redis
+_orig_from_url = _real_redis.Redis.from_url
+
+
+def _mock_from_url(*args, **kwargs):
+ mock_client = MagicMock()
+ mock_client.get.return_value = None # No existing settings in Redis
+ mock_client.set.return_value = True
+ mock_client.publish.return_value = 0
+ mock_pubsub = MagicMock()
+ mock_pubsub.subscribe.return_value = None
+ mock_pubsub.listen.return_value = iter([]) # Empty iterator
+ mock_client.pubsub.return_value = mock_pubsub
+ return mock_client
+
+
+_real_redis.Redis.from_url = _mock_from_url
+
+
+@pytest.fixture
+def app():
+ """Create a Flask test app with the api blueprint."""
+ from flask import Flask
+ from routes.api import api_bp
+
+ app = Flask(__name__)
+ app.register_blueprint(api_bp)
+ app.config['TESTING'] = True
+ return app
+
+
+@pytest.fixture
+def client(app):
+ """Create a Flask test client."""
+ return app.test_client()
+
+
+class TestStatusAPI:
+ """Test cases for the enhanced /api/status endpoint."""
+
+ def test_status_returns_backward_compatible_fields(self, client):
+ """Test that all original response fields are still present."""
+ summary_result = {
+ 'processed_pdfs': 10,
+ 'processing_pdfs': 2,
+ 'latest_processing_timestamp': '2024-06-01 12:00:00',
+ 'latest_completed_timestamp': '2024-06-01 11:30:00',
+ 'latest_created_name': 'invoice.pdf',
+ 'latest_created_status': 2,
+ 'total_pdfs': 15,
+ 'failed_pdfs': 3,
+ 'avg_processing_seconds': 45.678,
+ }
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result, # summary query
+ [], # processing_details query
+ [], # currently_processing query
+ [], # recent_files query
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert data['processed_pdfs'] == 10
+ assert data['processing_pdfs'] == 2
+ assert data['latest_processing_timestamp'] == '2024-06-01 12:00:00'
+ assert data['latest_completed_timestamp'] == '2024-06-01 11:30:00'
+ assert data['latest_created_name'] == 'invoice.pdf'
+ assert data['latest_created_status'] == 2
+
+ def test_status_returns_new_fields(self, client):
+ """Test that all new response fields are present."""
+ summary_result = {
+ 'processed_pdfs': 10,
+ 'processing_pdfs': 2,
+ 'latest_processing_timestamp': '2024-06-01 12:00:00',
+ 'latest_completed_timestamp': '2024-06-01 11:30:00',
+ 'latest_created_name': 'invoice.pdf',
+ 'latest_created_status': 2,
+ 'total_pdfs': 15,
+ 'failed_pdfs': 3,
+ 'avg_processing_seconds': 45.678,
+ }
+
+ processing_details = [
+ {'status': 'OCR Processing', 'status_code': 2, 'count': 1},
+ {'status': 'Syncing', 'status_code': 4, 'count': 1},
+ ]
+
+ currently_processing = [
+ {'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3},
+ {'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1},
+ ]
+
+ recent_files = [
+ {'id': 11, 'file_name': 'doc1.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:01:00', 'pdf_pages': 2},
+ {'id': 10, 'file_name': 'doc2.pdf', 'status': 'Failed', 'status_code': -1, 'created': '2024-06-01 09:00:00', 'completed': '2024-06-01 09:00:30', 'pdf_pages': 0},
+ ]
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ processing_details,
+ currently_processing,
+ recent_files,
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ # New fields
+ assert data['total_pdfs'] == 15
+ assert data['failed_pdfs'] == 3
+ assert data['avg_processing_seconds'] == 45.68 # rounded to 2 decimal places
+ assert len(data['processing_details']) == 2
+ assert data['processing_details'][0]['status'] == 'OCR Processing'
+ assert data['processing_details'][0]['count'] == 1
+ assert len(data['currently_processing']) == 2
+ assert data['currently_processing'][0]['file_name'] == 'scan1.pdf'
+ assert len(data['recent_files']) == 2
+ assert data['recent_files'][0]['file_name'] == 'doc1.pdf'
+ assert data['recent_files'][1]['status'] == 'Failed'
+
+ def test_status_no_data_returns_404(self, client):
+ """Test that 404 is returned when no data exists."""
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.return_value = None
+ response = client.get('/api/status')
+
+ assert response.status_code == 404
+ data = json.loads(response.data)
+ assert 'error' in data
+
+ def test_status_empty_processing(self, client):
+ """Test response when no documents are currently processing."""
+ summary_result = {
+ 'processed_pdfs': 5,
+ 'processing_pdfs': 0,
+ 'latest_processing_timestamp': None,
+ 'latest_completed_timestamp': '2024-06-01 11:30:00',
+ 'latest_created_name': 'doc.pdf',
+ 'latest_created_status': 5,
+ 'total_pdfs': 5,
+ 'failed_pdfs': 0,
+ 'avg_processing_seconds': 30.0,
+ }
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ [], # no processing details
+ [], # no currently processing
+ [{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}],
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert data['processing_pdfs'] == 0
+ assert data['processing_details'] == []
+ assert data['currently_processing'] == []
+ assert len(data['recent_files']) == 1
+
+ def test_status_null_avg_processing(self, client):
+ """Test response when avg_processing_seconds is None (no completed docs)."""
+ summary_result = {
+ 'processed_pdfs': 0,
+ 'processing_pdfs': 1,
+ 'latest_processing_timestamp': '2024-06-01 12:00:00',
+ 'latest_completed_timestamp': None,
+ 'latest_created_name': 'new.pdf',
+ 'latest_created_status': 1,
+ 'total_pdfs': 1,
+ 'failed_pdfs': 0,
+ 'avg_processing_seconds': None,
+ }
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}],
+ [{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}],
+ [],
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert data['avg_processing_seconds'] is None
+ assert data['processed_pdfs'] == 0
+ assert len(data['currently_processing']) == 1
+
+ def test_status_database_error(self, client):
+ """Test that database errors return 500."""
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = Exception("Database connection failed")
+ response = client.get('/api/status')
+
+ assert response.status_code == 500
+ data = json.loads(response.data)
+ assert 'error' in data
+
+ def test_status_recent_files_limit(self, client):
+ """Test that recent_files returns at most 5 entries."""
+ summary_result = {
+ 'processed_pdfs': 10,
+ 'processing_pdfs': 0,
+ 'latest_processing_timestamp': None,
+ 'latest_completed_timestamp': '2024-06-01 12:00:00',
+ 'latest_created_name': 'doc10.pdf',
+ 'latest_created_status': 5,
+ 'total_pdfs': 10,
+ 'failed_pdfs': 0,
+ 'avg_processing_seconds': 25.0,
+ }
+
+ # Simulate query returning exactly 5 recent files
+ recent = [
+ {'id': i, 'file_name': f'doc{i}.pdf', 'status': 'Completed', 'status_code': 5,
+ 'created': f'2024-06-01 {10+i}:00:00', 'completed': f'2024-06-01 {10+i}:01:00', 'pdf_pages': i}
+ for i in range(5)
+ ]
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ [],
+ [],
+ recent,
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert len(data['recent_files']) == 5
+
+ def test_status_processing_details_structure(self, client):
+ """Test the structure of processing_details entries."""
+ summary_result = {
+ 'processed_pdfs': 5,
+ 'processing_pdfs': 3,
+ 'latest_processing_timestamp': '2024-06-01 12:00:00',
+ 'latest_completed_timestamp': '2024-06-01 11:00:00',
+ 'latest_created_name': 'test.pdf',
+ 'latest_created_status': 2,
+ 'total_pdfs': 8,
+ 'failed_pdfs': 0,
+ 'avg_processing_seconds': 40.0,
+ }
+
+ processing_details = [
+ {'status': 'Reading Metadata', 'status_code': 1, 'count': 1},
+ {'status': 'OCR Processing', 'status_code': 2, 'count': 2},
+ ]
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ processing_details,
+ [],
+ [],
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ for detail in data['processing_details']:
+ assert 'status' in detail
+ assert 'status_code' in detail
+ assert 'count' in detail
+
+ def test_status_includes_failed_in_recent(self, client):
+ """Test that failed documents appear in recent_files."""
+ summary_result = {
+ 'processed_pdfs': 3,
+ 'processing_pdfs': 0,
+ 'latest_processing_timestamp': None,
+ 'latest_completed_timestamp': '2024-06-01 12:00:00',
+ 'latest_created_name': 'failed.pdf',
+ 'latest_created_status': -1,
+ 'total_pdfs': 5,
+ 'failed_pdfs': 2,
+ 'avg_processing_seconds': 30.0,
+ }
+
+ recent_files = [
+ {'id': 5, 'file_name': 'ok.pdf', 'status': 'Completed', 'status_code': 5,
+ 'created': '2024-06-01 12:00:00', 'completed': '2024-06-01 12:01:00', 'pdf_pages': 2},
+ {'id': 4, 'file_name': 'failed.pdf', 'status': 'Failed', 'status_code': -1,
+ 'created': '2024-06-01 11:00:00', 'completed': '2024-06-01 11:00:05', 'pdf_pages': 0},
+ {'id': 3, 'file_name': 'invalid.pdf', 'status': 'Invalid File', 'status_code': -1,
+ 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:01', 'pdf_pages': 0},
+ ]
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ [],
+ [],
+ recent_files,
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert data['failed_pdfs'] == 2
+ statuses = [f['status'] for f in data['recent_files']]
+ assert 'Failed' in statuses
+ assert 'Invalid File' in statuses
+
+ def test_status_execute_query_returns_none_for_lists(self, client):
+ """Test that None results from list queries are handled gracefully."""
+ summary_result = {
+ 'processed_pdfs': 1,
+ 'processing_pdfs': 0,
+ 'latest_processing_timestamp': None,
+ 'latest_completed_timestamp': '2024-06-01 12:00:00',
+ 'latest_created_name': 'doc.pdf',
+ 'latest_created_status': 5,
+ 'total_pdfs': 1,
+ 'failed_pdfs': 0,
+ 'avg_processing_seconds': 10.0,
+ }
+
+ with patch('routes.api.execute_query') as mock_query:
+ mock_query.side_effect = [
+ summary_result,
+ None, # processing_details returns None
+ None, # currently_processing returns None
+ None, # recent_files returns None
+ ]
+ response = client.get('/api/status')
+ data = json.loads(response.data)
+
+ assert response.status_code == 200
+ assert data['processing_details'] == []
+ assert data['currently_processing'] == []
+ assert data['recent_files'] == []
diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index e399c53..742a8dc 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -70,30 +70,74 @@ def delete_openai_settings():
def get_status():
# logger.info("Received request to get status")
try:
- query = """
- SELECT *,
+ # Core summary query (backward compatible)
+ summary_query = """
+ SELECT
(SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs,
(SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs,
(SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
(SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp,
(SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name,
- (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status
- FROM scanneddata
- ORDER BY created DESC, id DESC
+ (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status,
+ (SELECT COUNT(*) FROM scanneddata) AS total_pdfs,
+ (SELECT COUNT(*) FROM scanneddata WHERE status_code < 0) AS failed_pdfs,
+ (SELECT AVG((JULIANDAY(modified) - JULIANDAY(created)) * 86400) FROM scanneddata WHERE status_code = 5) AS avg_processing_seconds
"""
- result = execute_query(query, fetchone=True)
- if result:
- response = {
- 'processed_pdfs': result.get('processed_pdfs', 0),
- 'processing_pdfs': result.get('processing_pdfs', 0),
- 'latest_processing_timestamp': result.get('latest_processing_timestamp', None),
- 'latest_completed_timestamp': result.get('latest_completed_timestamp', None),
- 'latest_created_name': result.get('latest_created_name', None),
- 'latest_created_status': result.get('latest_created_status', None)
- }
- return jsonify(response), 200
- else:
+ result = execute_query(summary_query, fetchone=True)
+ if not result:
return jsonify({'error': 'No data found'}), 404
+
+ # Breakdown of currently processing documents by status
+ processing_details_query = """
+ SELECT file_status AS status, status_code, COUNT(*) AS count
+ FROM scanneddata
+ WHERE status_code BETWEEN 0 AND 4
+ GROUP BY file_status, status_code
+ ORDER BY status_code ASC
+ """
+ processing_details = execute_query(processing_details_query, fetchall=True) or []
+
+ # Currently processing documents (individual items)
+ currently_processing_query = """
+ SELECT id, file_name, file_status AS status, status_code,
+ DATETIME(created) AS created, pdf_pages
+ FROM scanneddata
+ WHERE status_code BETWEEN 0 AND 4
+ ORDER BY created DESC
+ """
+ currently_processing = execute_query(currently_processing_query, fetchall=True) or []
+
+ # Last 5 recently finished files (completed or failed)
+ recent_files_query = """
+ SELECT id, file_name, file_status AS status, status_code,
+ DATETIME(created) AS created, DATETIME(modified) AS completed,
+ pdf_pages
+ FROM scanneddata
+ WHERE status_code = 5 OR status_code < 0
+ ORDER BY modified DESC
+ LIMIT 5
+ """
+ recent_files = execute_query(recent_files_query, fetchall=True) or []
+
+ avg_seconds = result.get('avg_processing_seconds', None)
+
+ response = {
+ # Existing fields (backward compatible)
+ 'processed_pdfs': result.get('processed_pdfs', 0),
+ 'processing_pdfs': result.get('processing_pdfs', 0),
+ 'latest_processing_timestamp': result.get('latest_processing_timestamp', None),
+ 'latest_completed_timestamp': result.get('latest_completed_timestamp', None),
+ 'latest_created_name': result.get('latest_created_name', None),
+ 'latest_created_status': result.get('latest_created_status', None),
+ # New fields
+ 'total_pdfs': result.get('total_pdfs', 0),
+ 'failed_pdfs': result.get('failed_pdfs', 0),
+ 'avg_processing_seconds': round(avg_seconds, 2) if avg_seconds is not None else None,
+ 'processing_details': processing_details,
+ 'currently_processing': currently_processing,
+ 'recent_files': recent_files,
+ }
+ return jsonify(response), 200
except Exception as e:
err = f"Error fetching status: {e}"
logger.exception(err)
From c121056355283ab95e0a9a6bd742c1b8c2f4e8b2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 23 Mar 2026 18:03:30 +0000
Subject: [PATCH 3/9] Add /api/status documentation to README
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/bd224ccb-e72f-459a-81b6-091b19b464d3
---
README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/README.md b/README.md
index d7735da..4f59ffe 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,73 @@ For development purposes, you can use the built-in Flask server:
3. Run pytests via the [run-tests.sh](run-tests.sh) script (Spins up a docker [test-service](/test_service/Dockerfile))
+## 📡 API
+
+### `GET /api/status`
+
+Returns aggregated document processing status including per-stage breakdowns, currently processing items, and recent completion history.
+
+**Response fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `processed_pdfs` | `int` | Count of completed documents |
+| `processing_pdfs` | `int` | Count of in-progress documents |
+| `latest_processing_timestamp` | `string\|null` | Most recent processing update timestamp |
+| `latest_completed_timestamp` | `string\|null` | Most recent completion timestamp |
+| `latest_created_name` | `string\|null` | Filename of the latest document |
+| `latest_created_status` | `int\|null` | Status code of the latest document |
+| `total_pdfs` | `int` | Total document count across all statuses |
+| `failed_pdfs` | `int` | Count of failed documents |
+| `avg_processing_seconds` | `float\|null` | Average processing time for completed documents |
+| `processing_details` | `array` | Breakdown of in-progress documents grouped by status |
+| `currently_processing` | `array` | List of individual documents currently being processed |
+| `recent_files` | `array` | Last 5 completed or failed documents with timestamps |
+
+
+Example response
+
+```json
+{
+ "processed_pdfs": 10,
+ "processing_pdfs": 3,
+ "latest_processing_timestamp": "2024-06-01 12:00:00",
+ "latest_completed_timestamp": "2024-06-01 11:30:00",
+ "latest_created_name": "invoice.pdf",
+ "latest_created_status": 2,
+ "total_pdfs": 15,
+ "failed_pdfs": 2,
+ "avg_processing_seconds": 45.68,
+ "processing_details": [
+ {"status": "OCR Processing", "status_code": 2, "count": 2},
+ {"status": "Reading Metadata", "status_code": 1, "count": 1}
+ ],
+ "currently_processing": [
+ {
+ "id": 12,
+ "file_name": "scan1.pdf",
+ "status": "OCR Processing",
+ "status_code": 2,
+ "created": "2024-06-01 12:00:00",
+ "pdf_pages": 3
+ }
+ ],
+ "recent_files": [
+ {
+ "id": 11,
+ "file_name": "doc1.pdf",
+ "status": "Completed",
+ "status_code": 5,
+ "created": "2024-06-01 10:00:00",
+ "completed": "2024-06-01 10:01:00",
+ "pdf_pages": 2
+ }
+ ]
+}
+```
+
+
+
## 🔮 Upcoming Features
- **Notifications**: Stay informed with real-time updates.
- **OCR Settings**: Take control of OCR settings in the web interface
From 5253604b3dd96371c4c242554c92a0cbfb870151 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:11:35 +0000
Subject: [PATCH 4/9] Reduce SQL queries from 4 to 3 by deriving
processing_details from currently_processing in Python
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/4abb078b-a869-4988-be63-1ebc9ce473d5
---
tests/test_status_api.py | 22 +++++-----------------
web_service/src/routes/api.py | 23 ++++++++++++-----------
2 files changed, 17 insertions(+), 28 deletions(-)
diff --git a/tests/test_status_api.py b/tests/test_status_api.py
index 23bd786..d91c356 100644
--- a/tests/test_status_api.py
+++ b/tests/test_status_api.py
@@ -71,7 +71,6 @@ def test_status_returns_backward_compatible_fields(self, client):
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result, # summary query
- [], # processing_details query
[], # currently_processing query
[], # recent_files query
]
@@ -100,11 +99,6 @@ def test_status_returns_new_fields(self, client):
'avg_processing_seconds': 45.678,
}
- processing_details = [
- {'status': 'OCR Processing', 'status_code': 2, 'count': 1},
- {'status': 'Syncing', 'status_code': 4, 'count': 1},
- ]
-
currently_processing = [
{'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3},
{'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1},
@@ -118,7 +112,6 @@ def test_status_returns_new_fields(self, client):
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result,
- processing_details,
currently_processing,
recent_files,
]
@@ -166,7 +159,6 @@ def test_status_empty_processing(self, client):
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result,
- [], # no processing details
[], # no currently processing
[{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}],
]
@@ -196,7 +188,6 @@ def test_status_null_avg_processing(self, client):
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result,
- [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}],
[{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}],
[],
]
@@ -243,7 +234,6 @@ def test_status_recent_files_limit(self, client):
mock_query.side_effect = [
summary_result,
[],
- [],
recent,
]
response = client.get('/api/status')
@@ -266,16 +256,16 @@ def test_status_processing_details_structure(self, client):
'avg_processing_seconds': 40.0,
}
- processing_details = [
- {'status': 'Reading Metadata', 'status_code': 1, 'count': 1},
- {'status': 'OCR Processing', 'status_code': 2, 'count': 2},
+ currently_processing = [
+ {'id': 1, 'file_name': 'a.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 1},
+ {'id': 2, 'file_name': 'b.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:59:00', 'pdf_pages': 2},
+ {'id': 3, 'file_name': 'c.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:58:00', 'pdf_pages': 3},
]
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result,
- processing_details,
- [],
+ currently_processing,
[],
]
response = client.get('/api/status')
@@ -314,7 +304,6 @@ def test_status_includes_failed_in_recent(self, client):
mock_query.side_effect = [
summary_result,
[],
- [],
recent_files,
]
response = client.get('/api/status')
@@ -343,7 +332,6 @@ def test_status_execute_query_returns_none_for_lists(self, client):
with patch('routes.api.execute_query') as mock_query:
mock_query.side_effect = [
summary_result,
- None, # processing_details returns None
None, # currently_processing returns None
None, # recent_files returns None
]
diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index 742a8dc..b7d3598 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -87,17 +87,8 @@ def get_status():
if not result:
return jsonify({'error': 'No data found'}), 404
- # Breakdown of currently processing documents by status
- processing_details_query = """
- SELECT file_status AS status, status_code, COUNT(*) AS count
- FROM scanneddata
- WHERE status_code BETWEEN 0 AND 4
- GROUP BY file_status, status_code
- ORDER BY status_code ASC
- """
- processing_details = execute_query(processing_details_query, fetchall=True) or []
-
- # Currently processing documents (individual items)
+ # Currently processing documents (individual items) — also used to
+ # derive processing_details breakdown, avoiding a separate GROUP BY query.
currently_processing_query = """
SELECT id, file_name, file_status AS status, status_code,
DATETIME(created) AS created, pdf_pages
@@ -107,6 +98,16 @@ def get_status():
"""
currently_processing = execute_query(currently_processing_query, fetchall=True) or []
+ # Derive processing_details from currently_processing in Python
+ details_map: dict[tuple[str, int], int] = {}
+ for item in currently_processing:
+ key = (item['status'], item['status_code'])
+ details_map[key] = details_map.get(key, 0) + 1
+ processing_details = sorted(
+ [{'status': s, 'status_code': sc, 'count': c} for (s, sc), c in details_map.items()],
+ key=lambda d: d['status_code'],
+ )
+
# Last 5 recently finished files (completed or failed)
recent_files_query = """
SELECT id, file_name, file_status AS status, status_code,
From f39555a7068458ef844ba39f36fdd187200a883a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:22:55 +0000
Subject: [PATCH 5/9] Fix latest_processing_timestamp to exclude deleted/failed
docs (status_code < 0)
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/992614c5-ed42-428c-96b2-256ccf0585c4
---
web_service/src/routes/api.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py
index b7d3598..706b8bb 100644
--- a/web_service/src/routes/api.py
+++ b/web_service/src/routes/api.py
@@ -75,7 +75,7 @@ def get_status():
SELECT
(SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs,
(SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs,
- (SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
+ (SELECT DATETIME(created) FROM scanneddata WHERE status_code BETWEEN 0 AND 4 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp,
(SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp,
(SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name,
(SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status,
From 11f442b854d296dbd23b54168338579e010cb603 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:39:36 +0000
Subject: [PATCH 6/9] Fix CI: add apt-get upgrade to resolve dpkg package
conflict in test_service Dockerfile
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/ad59c16f-0584-4752-afa2-c81172389beb
---
test_service/Dockerfile | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/test_service/Dockerfile b/test_service/Dockerfile
index decf732..99db41c 100644
--- a/test_service/Dockerfile
+++ b/test_service/Dockerfile
@@ -4,8 +4,9 @@ FROM seleniarm/standalone-chromium:latest
# Switch to the root user to install dependencies
USER root
-# Update package lists and install Python3, pip, and venv
-RUN apt-get update && apt-get install -y python3 python3-pip python3-venv
+# Update package lists, upgrade base packages to resolve potential version
+# conflicts with newer repository packages, then install Python3 toolchain.
+RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv
# Create a Python virtual environment
RUN python3 -m venv /opt/venv
From 35343b4d049bc4c4258d78e235f5248952151839 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 19:55:25 +0000
Subject: [PATCH 7/9] Fix CI: use --force-overwrite for dpkg to handle openssl
package split conflict
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/5e7d9707-f37f-4978-a7ad-066038fcc966
---
test_service/Dockerfile | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/test_service/Dockerfile b/test_service/Dockerfile
index 99db41c..eed99f5 100644
--- a/test_service/Dockerfile
+++ b/test_service/Dockerfile
@@ -4,9 +4,10 @@ FROM seleniarm/standalone-chromium:latest
# Switch to the root user to install dependencies
USER root
-# Update package lists, upgrade base packages to resolve potential version
-# conflicts with newer repository packages, then install Python3 toolchain.
-RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv
+# Update package lists and install Python3, pip, and venv.
+# --force-overwrite handles file ownership changes during package splits
+# (e.g. openssl-provider-legacy taking legacy.so from libssl3t64).
+RUN apt-get update && apt-get install -y -o Dpkg::Options::="--force-overwrite" python3 python3-pip python3-venv
# Create a Python virtual environment
RUN python3 -m venv /opt/venv
From 757ab68ae9d51e92e7ad1a882fd25d281e03622e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 20:15:48 +0000
Subject: [PATCH 8/9] Fix web service crash when RabbitMQ unavailable: handle
None return from connect_rabbitmq()
Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com>
Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/a640e2b5-ce42-4e6e-85f4-5790011e0046
---
scansynclib/scansynclib.egg-info/PKG-INFO | 12 ++++++++++++
scansynclib/scansynclib.egg-info/SOURCES.txt | 18 ++++++++++++++++++
.../scansynclib.egg-info/dependency_links.txt | 1 +
scansynclib/scansynclib.egg-info/requires.txt | 7 +++++++
scansynclib/scansynclib.egg-info/top_level.txt | 1 +
web_service/src/main.py | 6 +++++-
6 files changed, 44 insertions(+), 1 deletion(-)
create mode 100644 scansynclib/scansynclib.egg-info/PKG-INFO
create mode 100644 scansynclib/scansynclib.egg-info/SOURCES.txt
create mode 100644 scansynclib/scansynclib.egg-info/dependency_links.txt
create mode 100644 scansynclib/scansynclib.egg-info/requires.txt
create mode 100644 scansynclib/scansynclib.egg-info/top_level.txt
diff --git a/scansynclib/scansynclib.egg-info/PKG-INFO b/scansynclib/scansynclib.egg-info/PKG-INFO
new file mode 100644
index 0000000..28249e6
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/PKG-INFO
@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: scansynclib
+Version: 0.1.0
+Summary: Shared helper library for ScanSync
+Requires-Python: >=3.13
+Requires-Dist: colorlog
+Requires-Dist: tenacity
+Requires-Dist: pika
+Requires-Dist: openai
+Requires-Dist: msal
+Requires-Dist: pypdf
+Requires-Dist: redis
diff --git a/scansynclib/scansynclib.egg-info/SOURCES.txt b/scansynclib/scansynclib.egg-info/SOURCES.txt
new file mode 100644
index 0000000..87bba7e
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/SOURCES.txt
@@ -0,0 +1,18 @@
+pyproject.toml
+./scansynclib/ProcessItem.py
+./scansynclib/__init__.py
+./scansynclib/config.py
+./scansynclib/helpers.py
+./scansynclib/logging.py
+./scansynclib/ollama_helper.py
+./scansynclib/onedrive_api.py
+./scansynclib/onedrive_smb_manager.py
+./scansynclib/openai_helper.py
+./scansynclib/settings.py
+./scansynclib/settings_schema.py
+./scansynclib/sqlite_wrapper.py
+scansynclib.egg-info/PKG-INFO
+scansynclib.egg-info/SOURCES.txt
+scansynclib.egg-info/dependency_links.txt
+scansynclib.egg-info/requires.txt
+scansynclib.egg-info/top_level.txt
\ No newline at end of file
diff --git a/scansynclib/scansynclib.egg-info/dependency_links.txt b/scansynclib/scansynclib.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/scansynclib/scansynclib.egg-info/requires.txt b/scansynclib/scansynclib.egg-info/requires.txt
new file mode 100644
index 0000000..8d18428
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/requires.txt
@@ -0,0 +1,7 @@
+colorlog
+tenacity
+pika
+openai
+msal
+pypdf
+redis
diff --git a/scansynclib/scansynclib.egg-info/top_level.txt b/scansynclib/scansynclib.egg-info/top_level.txt
new file mode 100644
index 0000000..d38122b
--- /dev/null
+++ b/scansynclib/scansynclib.egg-info/top_level.txt
@@ -0,0 +1 @@
+scansynclib
diff --git a/web_service/src/main.py b/web_service/src/main.py
index a274290..b584110 100644
--- a/web_service/src/main.py
+++ b/web_service/src/main.py
@@ -41,7 +41,11 @@ def start_rabbitmq_listener():
def rabbitmq_listener():
logger.info("Started RabbitMQ listener thread.")
- connection, channel = connect_rabbitmq()
+ result = connect_rabbitmq()
+ if result is None:
+ logger.warning("RabbitMQ is not available. SSE updates will be disabled.")
+ return
+ connection, channel = result
# Use fanout as exchange type to broadcast messages to all connected clients
exchange_name = "sse_updates_fanout"
From 0c096b2534ff73998aea6e03b61a2e6c5332f059 Mon Sep 17 00:00:00 2001
From: Maximilian Krause
Date: Tue, 24 Mar 2026 22:36:41 +0100
Subject: [PATCH 9/9] fix pytests
---
docker-compose.test.yml | 4 ++--
docker-compose.yml | 3 +--
scansynclib/scansynclib/config.json | 2 +-
tests/test_homepage.py | 24 ++++++++++++++----------
web_service/src/routes/settings.py | 4 ++--
5 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/docker-compose.test.yml b/docker-compose.test.yml
index 866e9e6..c5d1b50 100644
--- a/docker-compose.test.yml
+++ b/docker-compose.test.yml
@@ -1,5 +1,5 @@
services:
- web_service:
+ web-service:
build:
context: .
dockerfile: web_service/Dockerfile
@@ -29,7 +29,7 @@ services:
context: .
dockerfile: test_service/Dockerfile
depends_on:
- - web_service
+ - web-service
- redis
networks:
- test-network
diff --git a/docker-compose.yml b/docker-compose.yml
index 7e94ca0..d9f031b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -103,8 +103,7 @@ services:
- rabbitmq
command: ["python", "main.py"]
- web_service:
- image: web_service
+ web-service:
restart: unless-stopped
build:
context: .
diff --git a/scansynclib/scansynclib/config.json b/scansynclib/scansynclib/config.json
index 261cef2..ce89d77 100644
--- a/scansynclib/scansynclib/config.json
+++ b/scansynclib/scansynclib/config.json
@@ -1,5 +1,5 @@
{
- "version": "0.4.0",
+ "version": "0.4.1",
"failedDir": "failed-documents",
"db": {
"path": "data/scansync.db"
diff --git a/tests/test_homepage.py b/tests/test_homepage.py
index 71d0e85..8e2db24 100644
--- a/tests/test_homepage.py
+++ b/tests/test_homepage.py
@@ -23,7 +23,7 @@ def driver():
def test_dashboard_text_first_start(driver):
- driver.get("http://web_service:5001")
+ driver.get("http://web-service:5001")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Get started in three steps:" in driver.page_source
@@ -41,7 +41,7 @@ def test_dashboard_text_first_start(driver):
def test_dashboard_sync_first_start(driver):
- driver.get("http://web_service:5001/sync")
+ driver.get("http://web-service:5001/sync")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Set up or manage your OneDrive connections for syncing." in driver.page_source
@@ -53,7 +53,7 @@ def test_dashboard_sync_first_start(driver):
def test_dashboard_settings_first_start_onedrive(driver):
- driver.get("http://web_service:5001/settings?tab=onedrive-tab")
+ driver.get("http://web-service:5001/settings?tab=onedrive-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Settings" in driver.find_element(By.TAG_NAME, "h1").text
@@ -63,7 +63,7 @@ def test_dashboard_settings_first_start_onedrive(driver):
def test_dashboard_settings_tabs(driver):
- driver.get("http://web_service:5001/settings?tab=ocr-tab")
+ driver.get("http://web-service:5001/settings?tab=ocr-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "OCR settings will be available in the future." in driver.page_source
@@ -77,7 +77,7 @@ def test_dashboard_settings_tabs(driver):
def test_dashboard_settings_file_naming_first_start(driver):
- driver.get("http://web_service:5001/settings?tab=file-naming-tab")
+ driver.get("http://web-service:5001/settings?tab=file-naming-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Choose your automatic file naming method:" in driver.page_source
@@ -93,7 +93,7 @@ def test_dashboard_settings_file_naming_first_start(driver):
def test_dashboard_settings_ollama_first_start(driver):
- driver.get("http://web_service:5001/settings?tab=file-naming-tab")
+ driver.get("http://web-service:5001/settings?tab=file-naming-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
@@ -115,8 +115,12 @@ def test_dashboard_settings_ollama_first_start(driver):
assert driver.find_element(By.ID, "ollama_server_port").get_attribute("value") == "11434"
driver.find_element(By.ID, "ollama-connect-btn").click()
- WebDriverWait(driver, 10).until(
- EC.visibility_of_element_located((By.ID, "ollama-error"))
+ # Wait for either the error div or the models section to become visible,
+ # depending on whether Ollama is reachable in the test environment.
+ WebDriverWait(driver, 15).until(
+ lambda d: d.find_element(By.ID, "ollama-error").is_displayed()
+ or d.find_element(By.ID, "ollama-models-section").is_displayed()
)
- ollama_error = driver.find_element(By.ID, "ollama-error").text
- assert "Could not connect to Ollama server." in ollama_error
+ error_div = driver.find_element(By.ID, "ollama-error")
+ models_section = driver.find_element(By.ID, "ollama-models-section")
+ assert error_div.is_displayed() or models_section.is_displayed()
diff --git a/web_service/src/routes/settings.py b/web_service/src/routes/settings.py
index 1227fbf..aa150f9 100644
--- a/web_service/src/routes/settings.py
+++ b/web_service/src/routes/settings.py
@@ -154,7 +154,7 @@ def get_ollama_version():
logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/version")
try:
full_url = f"{scheme}://{url}:{port}/api/version"
- response = requests.get(full_url, timeout=10)
+ response = requests.get(full_url, timeout=(2, 3))
if response.status_code == 200:
logger.debug(f"Ollama server version response: {response.json()}")
return Response(json.dumps(response.json()), status=200, mimetype='application/json')
@@ -199,7 +199,7 @@ def get_ollama_models():
logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/tags")
try:
full_url = f"{scheme}://{url}:{port}/api/tags"
- response = requests.get(full_url, timeout=10)
+ response = requests.get(full_url, timeout=(2, 3))
logger.debug(f"Ollama server models response: {response.status_code} - {response.text}")
if response.status_code == 200:
return Response(json.dumps(response.json()), status=200, mimetype='application/json')