From 35cabee41a41aad211a0f22d505d4e39361cce68 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:18:25 +0000 Subject: [PATCH 1/9] Initial plan From e248cd9a88d42e6511cc3ca12c0a350ce4fc5bdf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:27:24 +0000 Subject: [PATCH 2/9] Enhance /api/status endpoint with processing details, recent files, and statistics Add new fields to the status API response: - processing_details: breakdown of processing documents by status - currently_processing: list of documents currently being processed - recent_files: last 5 completed/failed files with timestamps - total_pdfs: total document count - failed_pdfs: failed document count - avg_processing_seconds: average processing time All existing fields are preserved for backward compatibility. Add comprehensive unit tests for the enhanced status endpoint. Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/51a488f1-8693-4d02-894c-931f85fc9432 --- tests/test_status_api.py | 356 ++++++++++++++++++++++++++++++++++ web_service/src/routes/api.py | 78 ++++++-- 2 files changed, 417 insertions(+), 17 deletions(-) create mode 100644 tests/test_status_api.py diff --git a/tests/test_status_api.py b/tests/test_status_api.py new file mode 100644 index 0000000..23bd786 --- /dev/null +++ b/tests/test_status_api.py @@ -0,0 +1,356 @@ +"""Tests for the enhanced /api/status endpoint.""" + +import json +import pytest +import sys +import os +from unittest.mock import patch, MagicMock + +# Add paths for imports +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../scansynclib')) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../web_service/src')) + +# Ensure the data directory exists for sqlite_wrapper module-level initialization +os.makedirs(os.path.join(os.path.dirname(__file__), '../data'), exist_ok=True) + +# Mock Redis before any scansynclib imports, since settings.py connects at module level +import redis as _real_redis +_orig_from_url = _real_redis.Redis.from_url + + +def _mock_from_url(*args, **kwargs): + mock_client = MagicMock() + mock_client.get.return_value = None # No existing settings in Redis + mock_client.set.return_value = True + mock_client.publish.return_value = 0 + mock_pubsub = MagicMock() + mock_pubsub.subscribe.return_value = None + mock_pubsub.listen.return_value = iter([]) # Empty iterator + mock_client.pubsub.return_value = mock_pubsub + return mock_client + + +_real_redis.Redis.from_url = _mock_from_url + + +@pytest.fixture +def app(): + """Create a Flask test app with the api blueprint.""" + from flask import Flask + from routes.api import api_bp + + app = Flask(__name__) + app.register_blueprint(api_bp) + app.config['TESTING'] = True + return app + + +@pytest.fixture +def client(app): + """Create a Flask test client.""" + return app.test_client() + + +class TestStatusAPI: + """Test cases for the enhanced /api/status endpoint.""" + + def test_status_returns_backward_compatible_fields(self, client): + """Test that all original response fields are still present.""" + summary_result = { + 'processed_pdfs': 10, + 'processing_pdfs': 2, + 'latest_processing_timestamp': '2024-06-01 12:00:00', + 'latest_completed_timestamp': '2024-06-01 11:30:00', + 'latest_created_name': 'invoice.pdf', + 'latest_created_status': 2, + 'total_pdfs': 15, + 'failed_pdfs': 3, + 'avg_processing_seconds': 45.678, + } + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, # summary query + [], # processing_details query + [], # currently_processing query + [], # recent_files query + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert data['processed_pdfs'] == 10 + assert data['processing_pdfs'] == 2 + assert data['latest_processing_timestamp'] == '2024-06-01 12:00:00' + assert data['latest_completed_timestamp'] == '2024-06-01 11:30:00' + assert data['latest_created_name'] == 'invoice.pdf' + assert data['latest_created_status'] == 2 + + def test_status_returns_new_fields(self, client): + """Test that all new response fields are present.""" + summary_result = { + 'processed_pdfs': 10, + 'processing_pdfs': 2, + 'latest_processing_timestamp': '2024-06-01 12:00:00', + 'latest_completed_timestamp': '2024-06-01 11:30:00', + 'latest_created_name': 'invoice.pdf', + 'latest_created_status': 2, + 'total_pdfs': 15, + 'failed_pdfs': 3, + 'avg_processing_seconds': 45.678, + } + + processing_details = [ + {'status': 'OCR Processing', 'status_code': 2, 'count': 1}, + {'status': 'Syncing', 'status_code': 4, 'count': 1}, + ] + + currently_processing = [ + {'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3}, + {'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1}, + ] + + recent_files = [ + {'id': 11, 'file_name': 'doc1.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:01:00', 'pdf_pages': 2}, + {'id': 10, 'file_name': 'doc2.pdf', 'status': 'Failed', 'status_code': -1, 'created': '2024-06-01 09:00:00', 'completed': '2024-06-01 09:00:30', 'pdf_pages': 0}, + ] + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + processing_details, + currently_processing, + recent_files, + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + # New fields + assert data['total_pdfs'] == 15 + assert data['failed_pdfs'] == 3 + assert data['avg_processing_seconds'] == 45.68 # rounded to 2 decimal places + assert len(data['processing_details']) == 2 + assert data['processing_details'][0]['status'] == 'OCR Processing' + assert data['processing_details'][0]['count'] == 1 + assert len(data['currently_processing']) == 2 + assert data['currently_processing'][0]['file_name'] == 'scan1.pdf' + assert len(data['recent_files']) == 2 + assert data['recent_files'][0]['file_name'] == 'doc1.pdf' + assert data['recent_files'][1]['status'] == 'Failed' + + def test_status_no_data_returns_404(self, client): + """Test that 404 is returned when no data exists.""" + with patch('routes.api.execute_query') as mock_query: + mock_query.return_value = None + response = client.get('/api/status') + + assert response.status_code == 404 + data = json.loads(response.data) + assert 'error' in data + + def test_status_empty_processing(self, client): + """Test response when no documents are currently processing.""" + summary_result = { + 'processed_pdfs': 5, + 'processing_pdfs': 0, + 'latest_processing_timestamp': None, + 'latest_completed_timestamp': '2024-06-01 11:30:00', + 'latest_created_name': 'doc.pdf', + 'latest_created_status': 5, + 'total_pdfs': 5, + 'failed_pdfs': 0, + 'avg_processing_seconds': 30.0, + } + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + [], # no processing details + [], # no currently processing + [{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}], + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert data['processing_pdfs'] == 0 + assert data['processing_details'] == [] + assert data['currently_processing'] == [] + assert len(data['recent_files']) == 1 + + def test_status_null_avg_processing(self, client): + """Test response when avg_processing_seconds is None (no completed docs).""" + summary_result = { + 'processed_pdfs': 0, + 'processing_pdfs': 1, + 'latest_processing_timestamp': '2024-06-01 12:00:00', + 'latest_completed_timestamp': None, + 'latest_created_name': 'new.pdf', + 'latest_created_status': 1, + 'total_pdfs': 1, + 'failed_pdfs': 0, + 'avg_processing_seconds': None, + } + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}], + [{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}], + [], + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert data['avg_processing_seconds'] is None + assert data['processed_pdfs'] == 0 + assert len(data['currently_processing']) == 1 + + def test_status_database_error(self, client): + """Test that database errors return 500.""" + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = Exception("Database connection failed") + response = client.get('/api/status') + + assert response.status_code == 500 + data = json.loads(response.data) + assert 'error' in data + + def test_status_recent_files_limit(self, client): + """Test that recent_files returns at most 5 entries.""" + summary_result = { + 'processed_pdfs': 10, + 'processing_pdfs': 0, + 'latest_processing_timestamp': None, + 'latest_completed_timestamp': '2024-06-01 12:00:00', + 'latest_created_name': 'doc10.pdf', + 'latest_created_status': 5, + 'total_pdfs': 10, + 'failed_pdfs': 0, + 'avg_processing_seconds': 25.0, + } + + # Simulate query returning exactly 5 recent files + recent = [ + {'id': i, 'file_name': f'doc{i}.pdf', 'status': 'Completed', 'status_code': 5, + 'created': f'2024-06-01 {10+i}:00:00', 'completed': f'2024-06-01 {10+i}:01:00', 'pdf_pages': i} + for i in range(5) + ] + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + [], + [], + recent, + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert len(data['recent_files']) == 5 + + def test_status_processing_details_structure(self, client): + """Test the structure of processing_details entries.""" + summary_result = { + 'processed_pdfs': 5, + 'processing_pdfs': 3, + 'latest_processing_timestamp': '2024-06-01 12:00:00', + 'latest_completed_timestamp': '2024-06-01 11:00:00', + 'latest_created_name': 'test.pdf', + 'latest_created_status': 2, + 'total_pdfs': 8, + 'failed_pdfs': 0, + 'avg_processing_seconds': 40.0, + } + + processing_details = [ + {'status': 'Reading Metadata', 'status_code': 1, 'count': 1}, + {'status': 'OCR Processing', 'status_code': 2, 'count': 2}, + ] + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + processing_details, + [], + [], + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + for detail in data['processing_details']: + assert 'status' in detail + assert 'status_code' in detail + assert 'count' in detail + + def test_status_includes_failed_in_recent(self, client): + """Test that failed documents appear in recent_files.""" + summary_result = { + 'processed_pdfs': 3, + 'processing_pdfs': 0, + 'latest_processing_timestamp': None, + 'latest_completed_timestamp': '2024-06-01 12:00:00', + 'latest_created_name': 'failed.pdf', + 'latest_created_status': -1, + 'total_pdfs': 5, + 'failed_pdfs': 2, + 'avg_processing_seconds': 30.0, + } + + recent_files = [ + {'id': 5, 'file_name': 'ok.pdf', 'status': 'Completed', 'status_code': 5, + 'created': '2024-06-01 12:00:00', 'completed': '2024-06-01 12:01:00', 'pdf_pages': 2}, + {'id': 4, 'file_name': 'failed.pdf', 'status': 'Failed', 'status_code': -1, + 'created': '2024-06-01 11:00:00', 'completed': '2024-06-01 11:00:05', 'pdf_pages': 0}, + {'id': 3, 'file_name': 'invalid.pdf', 'status': 'Invalid File', 'status_code': -1, + 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:01', 'pdf_pages': 0}, + ] + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + [], + [], + recent_files, + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert data['failed_pdfs'] == 2 + statuses = [f['status'] for f in data['recent_files']] + assert 'Failed' in statuses + assert 'Invalid File' in statuses + + def test_status_execute_query_returns_none_for_lists(self, client): + """Test that None results from list queries are handled gracefully.""" + summary_result = { + 'processed_pdfs': 1, + 'processing_pdfs': 0, + 'latest_processing_timestamp': None, + 'latest_completed_timestamp': '2024-06-01 12:00:00', + 'latest_created_name': 'doc.pdf', + 'latest_created_status': 5, + 'total_pdfs': 1, + 'failed_pdfs': 0, + 'avg_processing_seconds': 10.0, + } + + with patch('routes.api.execute_query') as mock_query: + mock_query.side_effect = [ + summary_result, + None, # processing_details returns None + None, # currently_processing returns None + None, # recent_files returns None + ] + response = client.get('/api/status') + data = json.loads(response.data) + + assert response.status_code == 200 + assert data['processing_details'] == [] + assert data['currently_processing'] == [] + assert data['recent_files'] == [] diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py index e399c53..742a8dc 100644 --- a/web_service/src/routes/api.py +++ b/web_service/src/routes/api.py @@ -70,30 +70,74 @@ def delete_openai_settings(): def get_status(): # logger.info("Received request to get status") try: - query = """ - SELECT *, + # Core summary query (backward compatible) + summary_query = """ + SELECT (SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs, (SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs, (SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp, (SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp, (SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name, - (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status - FROM scanneddata - ORDER BY created DESC, id DESC + (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status, + (SELECT COUNT(*) FROM scanneddata) AS total_pdfs, + (SELECT COUNT(*) FROM scanneddata WHERE status_code < 0) AS failed_pdfs, + (SELECT AVG((JULIANDAY(modified) - JULIANDAY(created)) * 86400) FROM scanneddata WHERE status_code = 5) AS avg_processing_seconds """ - result = execute_query(query, fetchone=True) - if result: - response = { - 'processed_pdfs': result.get('processed_pdfs', 0), - 'processing_pdfs': result.get('processing_pdfs', 0), - 'latest_processing_timestamp': result.get('latest_processing_timestamp', None), - 'latest_completed_timestamp': result.get('latest_completed_timestamp', None), - 'latest_created_name': result.get('latest_created_name', None), - 'latest_created_status': result.get('latest_created_status', None) - } - return jsonify(response), 200 - else: + result = execute_query(summary_query, fetchone=True) + if not result: return jsonify({'error': 'No data found'}), 404 + + # Breakdown of currently processing documents by status + processing_details_query = """ + SELECT file_status AS status, status_code, COUNT(*) AS count + FROM scanneddata + WHERE status_code BETWEEN 0 AND 4 + GROUP BY file_status, status_code + ORDER BY status_code ASC + """ + processing_details = execute_query(processing_details_query, fetchall=True) or [] + + # Currently processing documents (individual items) + currently_processing_query = """ + SELECT id, file_name, file_status AS status, status_code, + DATETIME(created) AS created, pdf_pages + FROM scanneddata + WHERE status_code BETWEEN 0 AND 4 + ORDER BY created DESC + """ + currently_processing = execute_query(currently_processing_query, fetchall=True) or [] + + # Last 5 recently finished files (completed or failed) + recent_files_query = """ + SELECT id, file_name, file_status AS status, status_code, + DATETIME(created) AS created, DATETIME(modified) AS completed, + pdf_pages + FROM scanneddata + WHERE status_code = 5 OR status_code < 0 + ORDER BY modified DESC + LIMIT 5 + """ + recent_files = execute_query(recent_files_query, fetchall=True) or [] + + avg_seconds = result.get('avg_processing_seconds', None) + + response = { + # Existing fields (backward compatible) + 'processed_pdfs': result.get('processed_pdfs', 0), + 'processing_pdfs': result.get('processing_pdfs', 0), + 'latest_processing_timestamp': result.get('latest_processing_timestamp', None), + 'latest_completed_timestamp': result.get('latest_completed_timestamp', None), + 'latest_created_name': result.get('latest_created_name', None), + 'latest_created_status': result.get('latest_created_status', None), + # New fields + 'total_pdfs': result.get('total_pdfs', 0), + 'failed_pdfs': result.get('failed_pdfs', 0), + 'avg_processing_seconds': round(avg_seconds, 2) if avg_seconds is not None else None, + 'processing_details': processing_details, + 'currently_processing': currently_processing, + 'recent_files': recent_files, + } + return jsonify(response), 200 except Exception as e: err = f"Error fetching status: {e}" logger.exception(err) From c121056355283ab95e0a9a6bd742c1b8c2f4e8b2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 18:03:30 +0000 Subject: [PATCH 3/9] Add /api/status documentation to README Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/bd224ccb-e72f-459a-81b6-091b19b464d3 --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/README.md b/README.md index d7735da..4f59ffe 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,73 @@ For development purposes, you can use the built-in Flask server: 3. Run pytests via the [run-tests.sh](run-tests.sh) script (Spins up a docker [test-service](/test_service/Dockerfile)) +## 📡 API + +### `GET /api/status` + +Returns aggregated document processing status including per-stage breakdowns, currently processing items, and recent completion history. + +**Response fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `processed_pdfs` | `int` | Count of completed documents | +| `processing_pdfs` | `int` | Count of in-progress documents | +| `latest_processing_timestamp` | `string\|null` | Most recent processing update timestamp | +| `latest_completed_timestamp` | `string\|null` | Most recent completion timestamp | +| `latest_created_name` | `string\|null` | Filename of the latest document | +| `latest_created_status` | `int\|null` | Status code of the latest document | +| `total_pdfs` | `int` | Total document count across all statuses | +| `failed_pdfs` | `int` | Count of failed documents | +| `avg_processing_seconds` | `float\|null` | Average processing time for completed documents | +| `processing_details` | `array` | Breakdown of in-progress documents grouped by status | +| `currently_processing` | `array` | List of individual documents currently being processed | +| `recent_files` | `array` | Last 5 completed or failed documents with timestamps | + +
+Example response + +```json +{ + "processed_pdfs": 10, + "processing_pdfs": 3, + "latest_processing_timestamp": "2024-06-01 12:00:00", + "latest_completed_timestamp": "2024-06-01 11:30:00", + "latest_created_name": "invoice.pdf", + "latest_created_status": 2, + "total_pdfs": 15, + "failed_pdfs": 2, + "avg_processing_seconds": 45.68, + "processing_details": [ + {"status": "OCR Processing", "status_code": 2, "count": 2}, + {"status": "Reading Metadata", "status_code": 1, "count": 1} + ], + "currently_processing": [ + { + "id": 12, + "file_name": "scan1.pdf", + "status": "OCR Processing", + "status_code": 2, + "created": "2024-06-01 12:00:00", + "pdf_pages": 3 + } + ], + "recent_files": [ + { + "id": 11, + "file_name": "doc1.pdf", + "status": "Completed", + "status_code": 5, + "created": "2024-06-01 10:00:00", + "completed": "2024-06-01 10:01:00", + "pdf_pages": 2 + } + ] +} +``` + +
+ ## 🔮 Upcoming Features - **Notifications**: Stay informed with real-time updates. - **OCR Settings**: Take control of OCR settings in the web interface From 5253604b3dd96371c4c242554c92a0cbfb870151 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 19:11:35 +0000 Subject: [PATCH 4/9] Reduce SQL queries from 4 to 3 by deriving processing_details from currently_processing in Python Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/4abb078b-a869-4988-be63-1ebc9ce473d5 --- tests/test_status_api.py | 22 +++++----------------- web_service/src/routes/api.py | 23 ++++++++++++----------- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/tests/test_status_api.py b/tests/test_status_api.py index 23bd786..d91c356 100644 --- a/tests/test_status_api.py +++ b/tests/test_status_api.py @@ -71,7 +71,6 @@ def test_status_returns_backward_compatible_fields(self, client): with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, # summary query - [], # processing_details query [], # currently_processing query [], # recent_files query ] @@ -100,11 +99,6 @@ def test_status_returns_new_fields(self, client): 'avg_processing_seconds': 45.678, } - processing_details = [ - {'status': 'OCR Processing', 'status_code': 2, 'count': 1}, - {'status': 'Syncing', 'status_code': 4, 'count': 1}, - ] - currently_processing = [ {'id': 12, 'file_name': 'scan1.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 12:00:00', 'pdf_pages': 3}, {'id': 13, 'file_name': 'scan2.pdf', 'status': 'Syncing', 'status_code': 4, 'created': '2024-06-01 11:55:00', 'pdf_pages': 1}, @@ -118,7 +112,6 @@ def test_status_returns_new_fields(self, client): with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, - processing_details, currently_processing, recent_files, ] @@ -166,7 +159,6 @@ def test_status_empty_processing(self, client): with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, - [], # no processing details [], # no currently processing [{'id': 1, 'file_name': 'a.pdf', 'status': 'Completed', 'status_code': 5, 'created': '2024-06-01 10:00:00', 'completed': '2024-06-01 10:00:30', 'pdf_pages': 1}], ] @@ -196,7 +188,6 @@ def test_status_null_avg_processing(self, client): with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, - [{'status': 'Reading Metadata', 'status_code': 1, 'count': 1}], [{'id': 1, 'file_name': 'new.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 0}], [], ] @@ -243,7 +234,6 @@ def test_status_recent_files_limit(self, client): mock_query.side_effect = [ summary_result, [], - [], recent, ] response = client.get('/api/status') @@ -266,16 +256,16 @@ def test_status_processing_details_structure(self, client): 'avg_processing_seconds': 40.0, } - processing_details = [ - {'status': 'Reading Metadata', 'status_code': 1, 'count': 1}, - {'status': 'OCR Processing', 'status_code': 2, 'count': 2}, + currently_processing = [ + {'id': 1, 'file_name': 'a.pdf', 'status': 'Reading Metadata', 'status_code': 1, 'created': '2024-06-01 12:00:00', 'pdf_pages': 1}, + {'id': 2, 'file_name': 'b.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:59:00', 'pdf_pages': 2}, + {'id': 3, 'file_name': 'c.pdf', 'status': 'OCR Processing', 'status_code': 2, 'created': '2024-06-01 11:58:00', 'pdf_pages': 3}, ] with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, - processing_details, - [], + currently_processing, [], ] response = client.get('/api/status') @@ -314,7 +304,6 @@ def test_status_includes_failed_in_recent(self, client): mock_query.side_effect = [ summary_result, [], - [], recent_files, ] response = client.get('/api/status') @@ -343,7 +332,6 @@ def test_status_execute_query_returns_none_for_lists(self, client): with patch('routes.api.execute_query') as mock_query: mock_query.side_effect = [ summary_result, - None, # processing_details returns None None, # currently_processing returns None None, # recent_files returns None ] diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py index 742a8dc..b7d3598 100644 --- a/web_service/src/routes/api.py +++ b/web_service/src/routes/api.py @@ -87,17 +87,8 @@ def get_status(): if not result: return jsonify({'error': 'No data found'}), 404 - # Breakdown of currently processing documents by status - processing_details_query = """ - SELECT file_status AS status, status_code, COUNT(*) AS count - FROM scanneddata - WHERE status_code BETWEEN 0 AND 4 - GROUP BY file_status, status_code - ORDER BY status_code ASC - """ - processing_details = execute_query(processing_details_query, fetchall=True) or [] - - # Currently processing documents (individual items) + # Currently processing documents (individual items) — also used to + # derive processing_details breakdown, avoiding a separate GROUP BY query. currently_processing_query = """ SELECT id, file_name, file_status AS status, status_code, DATETIME(created) AS created, pdf_pages @@ -107,6 +98,16 @@ def get_status(): """ currently_processing = execute_query(currently_processing_query, fetchall=True) or [] + # Derive processing_details from currently_processing in Python + details_map: dict[tuple[str, int], int] = {} + for item in currently_processing: + key = (item['status'], item['status_code']) + details_map[key] = details_map.get(key, 0) + 1 + processing_details = sorted( + [{'status': s, 'status_code': sc, 'count': c} for (s, sc), c in details_map.items()], + key=lambda d: d['status_code'], + ) + # Last 5 recently finished files (completed or failed) recent_files_query = """ SELECT id, file_name, file_status AS status, status_code, From f39555a7068458ef844ba39f36fdd187200a883a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 19:22:55 +0000 Subject: [PATCH 5/9] Fix latest_processing_timestamp to exclude deleted/failed docs (status_code < 0) Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/992614c5-ed42-428c-96b2-256ccf0585c4 --- web_service/src/routes/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_service/src/routes/api.py b/web_service/src/routes/api.py index b7d3598..706b8bb 100644 --- a/web_service/src/routes/api.py +++ b/web_service/src/routes/api.py @@ -75,7 +75,7 @@ def get_status(): SELECT (SELECT COUNT(*) FROM scanneddata WHERE status_code = 5) AS processed_pdfs, (SELECT COUNT(*) FROM scanneddata WHERE status_code BETWEEN 0 AND 4) AS processing_pdfs, - (SELECT DATETIME(created) FROM scanneddata WHERE status_code < 5 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp, + (SELECT DATETIME(created) FROM scanneddata WHERE status_code BETWEEN 0 AND 4 ORDER BY created DESC LIMIT 1) AS latest_processing_timestamp, (SELECT DATETIME(modified) FROM scanneddata WHERE status_code = 5 ORDER BY modified DESC LIMIT 1) AS latest_completed_timestamp, (SELECT file_name FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_name, (SELECT status_code FROM scanneddata ORDER BY created DESC LIMIT 1) AS latest_created_status, From 11f442b854d296dbd23b54168338579e010cb603 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 19:39:36 +0000 Subject: [PATCH 6/9] Fix CI: add apt-get upgrade to resolve dpkg package conflict in test_service Dockerfile Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/ad59c16f-0584-4752-afa2-c81172389beb --- test_service/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_service/Dockerfile b/test_service/Dockerfile index decf732..99db41c 100644 --- a/test_service/Dockerfile +++ b/test_service/Dockerfile @@ -4,8 +4,9 @@ FROM seleniarm/standalone-chromium:latest # Switch to the root user to install dependencies USER root -# Update package lists and install Python3, pip, and venv -RUN apt-get update && apt-get install -y python3 python3-pip python3-venv +# Update package lists, upgrade base packages to resolve potential version +# conflicts with newer repository packages, then install Python3 toolchain. +RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv # Create a Python virtual environment RUN python3 -m venv /opt/venv From 35343b4d049bc4c4258d78e235f5248952151839 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 19:55:25 +0000 Subject: [PATCH 7/9] Fix CI: use --force-overwrite for dpkg to handle openssl package split conflict Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/5e7d9707-f37f-4978-a7ad-066038fcc966 --- test_service/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test_service/Dockerfile b/test_service/Dockerfile index 99db41c..eed99f5 100644 --- a/test_service/Dockerfile +++ b/test_service/Dockerfile @@ -4,9 +4,10 @@ FROM seleniarm/standalone-chromium:latest # Switch to the root user to install dependencies USER root -# Update package lists, upgrade base packages to resolve potential version -# conflicts with newer repository packages, then install Python3 toolchain. -RUN apt-get update && apt-get upgrade -y && apt-get install -y python3 python3-pip python3-venv +# Update package lists and install Python3, pip, and venv. +# --force-overwrite handles file ownership changes during package splits +# (e.g. openssl-provider-legacy taking legacy.so from libssl3t64). +RUN apt-get update && apt-get install -y -o Dpkg::Options::="--force-overwrite" python3 python3-pip python3-venv # Create a Python virtual environment RUN python3 -m venv /opt/venv From 757ab68ae9d51e92e7ad1a882fd25d281e03622e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 20:15:48 +0000 Subject: [PATCH 8/9] Fix web service crash when RabbitMQ unavailable: handle None return from connect_rabbitmq() Co-authored-by: maxi07 <7480270+maxi07@users.noreply.github.com> Agent-Logs-Url: https://github.com/maxi07/ScanSync/sessions/a640e2b5-ce42-4e6e-85f4-5790011e0046 --- scansynclib/scansynclib.egg-info/PKG-INFO | 12 ++++++++++++ scansynclib/scansynclib.egg-info/SOURCES.txt | 18 ++++++++++++++++++ .../scansynclib.egg-info/dependency_links.txt | 1 + scansynclib/scansynclib.egg-info/requires.txt | 7 +++++++ scansynclib/scansynclib.egg-info/top_level.txt | 1 + web_service/src/main.py | 6 +++++- 6 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 scansynclib/scansynclib.egg-info/PKG-INFO create mode 100644 scansynclib/scansynclib.egg-info/SOURCES.txt create mode 100644 scansynclib/scansynclib.egg-info/dependency_links.txt create mode 100644 scansynclib/scansynclib.egg-info/requires.txt create mode 100644 scansynclib/scansynclib.egg-info/top_level.txt diff --git a/scansynclib/scansynclib.egg-info/PKG-INFO b/scansynclib/scansynclib.egg-info/PKG-INFO new file mode 100644 index 0000000..28249e6 --- /dev/null +++ b/scansynclib/scansynclib.egg-info/PKG-INFO @@ -0,0 +1,12 @@ +Metadata-Version: 2.4 +Name: scansynclib +Version: 0.1.0 +Summary: Shared helper library for ScanSync +Requires-Python: >=3.13 +Requires-Dist: colorlog +Requires-Dist: tenacity +Requires-Dist: pika +Requires-Dist: openai +Requires-Dist: msal +Requires-Dist: pypdf +Requires-Dist: redis diff --git a/scansynclib/scansynclib.egg-info/SOURCES.txt b/scansynclib/scansynclib.egg-info/SOURCES.txt new file mode 100644 index 0000000..87bba7e --- /dev/null +++ b/scansynclib/scansynclib.egg-info/SOURCES.txt @@ -0,0 +1,18 @@ +pyproject.toml +./scansynclib/ProcessItem.py +./scansynclib/__init__.py +./scansynclib/config.py +./scansynclib/helpers.py +./scansynclib/logging.py +./scansynclib/ollama_helper.py +./scansynclib/onedrive_api.py +./scansynclib/onedrive_smb_manager.py +./scansynclib/openai_helper.py +./scansynclib/settings.py +./scansynclib/settings_schema.py +./scansynclib/sqlite_wrapper.py +scansynclib.egg-info/PKG-INFO +scansynclib.egg-info/SOURCES.txt +scansynclib.egg-info/dependency_links.txt +scansynclib.egg-info/requires.txt +scansynclib.egg-info/top_level.txt \ No newline at end of file diff --git a/scansynclib/scansynclib.egg-info/dependency_links.txt b/scansynclib/scansynclib.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/scansynclib/scansynclib.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/scansynclib/scansynclib.egg-info/requires.txt b/scansynclib/scansynclib.egg-info/requires.txt new file mode 100644 index 0000000..8d18428 --- /dev/null +++ b/scansynclib/scansynclib.egg-info/requires.txt @@ -0,0 +1,7 @@ +colorlog +tenacity +pika +openai +msal +pypdf +redis diff --git a/scansynclib/scansynclib.egg-info/top_level.txt b/scansynclib/scansynclib.egg-info/top_level.txt new file mode 100644 index 0000000..d38122b --- /dev/null +++ b/scansynclib/scansynclib.egg-info/top_level.txt @@ -0,0 +1 @@ +scansynclib diff --git a/web_service/src/main.py b/web_service/src/main.py index a274290..b584110 100644 --- a/web_service/src/main.py +++ b/web_service/src/main.py @@ -41,7 +41,11 @@ def start_rabbitmq_listener(): def rabbitmq_listener(): logger.info("Started RabbitMQ listener thread.") - connection, channel = connect_rabbitmq() + result = connect_rabbitmq() + if result is None: + logger.warning("RabbitMQ is not available. SSE updates will be disabled.") + return + connection, channel = result # Use fanout as exchange type to broadcast messages to all connected clients exchange_name = "sse_updates_fanout" From 0c096b2534ff73998aea6e03b61a2e6c5332f059 Mon Sep 17 00:00:00 2001 From: Maximilian Krause Date: Tue, 24 Mar 2026 22:36:41 +0100 Subject: [PATCH 9/9] fix pytests --- docker-compose.test.yml | 4 ++-- docker-compose.yml | 3 +-- scansynclib/scansynclib/config.json | 2 +- tests/test_homepage.py | 24 ++++++++++++++---------- web_service/src/routes/settings.py | 4 ++-- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 866e9e6..c5d1b50 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -1,5 +1,5 @@ services: - web_service: + web-service: build: context: . dockerfile: web_service/Dockerfile @@ -29,7 +29,7 @@ services: context: . dockerfile: test_service/Dockerfile depends_on: - - web_service + - web-service - redis networks: - test-network diff --git a/docker-compose.yml b/docker-compose.yml index 7e94ca0..d9f031b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -103,8 +103,7 @@ services: - rabbitmq command: ["python", "main.py"] - web_service: - image: web_service + web-service: restart: unless-stopped build: context: . diff --git a/scansynclib/scansynclib/config.json b/scansynclib/scansynclib/config.json index 261cef2..ce89d77 100644 --- a/scansynclib/scansynclib/config.json +++ b/scansynclib/scansynclib/config.json @@ -1,5 +1,5 @@ { - "version": "0.4.0", + "version": "0.4.1", "failedDir": "failed-documents", "db": { "path": "data/scansync.db" diff --git a/tests/test_homepage.py b/tests/test_homepage.py index 71d0e85..8e2db24 100644 --- a/tests/test_homepage.py +++ b/tests/test_homepage.py @@ -23,7 +23,7 @@ def driver(): def test_dashboard_text_first_start(driver): - driver.get("http://web_service:5001") + driver.get("http://web-service:5001") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "ScanSync" in driver.title assert "Get started in three steps:" in driver.page_source @@ -41,7 +41,7 @@ def test_dashboard_text_first_start(driver): def test_dashboard_sync_first_start(driver): - driver.get("http://web_service:5001/sync") + driver.get("http://web-service:5001/sync") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "ScanSync" in driver.title assert "Set up or manage your OneDrive connections for syncing." in driver.page_source @@ -53,7 +53,7 @@ def test_dashboard_sync_first_start(driver): def test_dashboard_settings_first_start_onedrive(driver): - driver.get("http://web_service:5001/settings?tab=onedrive-tab") + driver.get("http://web-service:5001/settings?tab=onedrive-tab") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "ScanSync" in driver.title assert "Settings" in driver.find_element(By.TAG_NAME, "h1").text @@ -63,7 +63,7 @@ def test_dashboard_settings_first_start_onedrive(driver): def test_dashboard_settings_tabs(driver): - driver.get("http://web_service:5001/settings?tab=ocr-tab") + driver.get("http://web-service:5001/settings?tab=ocr-tab") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "OCR settings will be available in the future." in driver.page_source @@ -77,7 +77,7 @@ def test_dashboard_settings_tabs(driver): def test_dashboard_settings_file_naming_first_start(driver): - driver.get("http://web_service:5001/settings?tab=file-naming-tab") + driver.get("http://web-service:5001/settings?tab=file-naming-tab") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "ScanSync" in driver.title assert "Choose your automatic file naming method:" in driver.page_source @@ -93,7 +93,7 @@ def test_dashboard_settings_file_naming_first_start(driver): def test_dashboard_settings_ollama_first_start(driver): - driver.get("http://web_service:5001/settings?tab=file-naming-tab") + driver.get("http://web-service:5001/settings?tab=file-naming-tab") WebDriverWait(driver, 10).until(EC.title_contains("ScanSync")) assert "ScanSync" in driver.title @@ -115,8 +115,12 @@ def test_dashboard_settings_ollama_first_start(driver): assert driver.find_element(By.ID, "ollama_server_port").get_attribute("value") == "11434" driver.find_element(By.ID, "ollama-connect-btn").click() - WebDriverWait(driver, 10).until( - EC.visibility_of_element_located((By.ID, "ollama-error")) + # Wait for either the error div or the models section to become visible, + # depending on whether Ollama is reachable in the test environment. + WebDriverWait(driver, 15).until( + lambda d: d.find_element(By.ID, "ollama-error").is_displayed() + or d.find_element(By.ID, "ollama-models-section").is_displayed() ) - ollama_error = driver.find_element(By.ID, "ollama-error").text - assert "Could not connect to Ollama server." in ollama_error + error_div = driver.find_element(By.ID, "ollama-error") + models_section = driver.find_element(By.ID, "ollama-models-section") + assert error_div.is_displayed() or models_section.is_displayed() diff --git a/web_service/src/routes/settings.py b/web_service/src/routes/settings.py index 1227fbf..aa150f9 100644 --- a/web_service/src/routes/settings.py +++ b/web_service/src/routes/settings.py @@ -154,7 +154,7 @@ def get_ollama_version(): logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/version") try: full_url = f"{scheme}://{url}:{port}/api/version" - response = requests.get(full_url, timeout=10) + response = requests.get(full_url, timeout=(2, 3)) if response.status_code == 200: logger.debug(f"Ollama server version response: {response.json()}") return Response(json.dumps(response.json()), status=200, mimetype='application/json') @@ -199,7 +199,7 @@ def get_ollama_models(): logger.debug(f"Connecting to Ollama server at {scheme}://{url}:{port}/api/tags") try: full_url = f"{scheme}://{url}:{port}/api/tags" - response = requests.get(full_url, timeout=10) + response = requests.get(full_url, timeout=(2, 3)) logger.debug(f"Ollama server models response: {response.status_code} - {response.text}") if response.status_code == 200: return Response(json.dumps(response.json()), status=200, mimetype='application/json')