Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,73 @@ For development purposes, you can use the built-in Flask server:
3. Run pytests via the [run-tests.sh](run-tests.sh) script (Spins up a docker [test-service](/test_service/Dockerfile))


## 📡 API

### `GET /api/status`

Returns aggregated document processing status including per-stage breakdowns, currently processing items, and recent completion history.

**Response fields:**

| Field | Type | Description |
|-------|------|-------------|
| `processed_pdfs` | `int` | Count of completed documents |
| `processing_pdfs` | `int` | Count of in-progress documents |
| `latest_processing_timestamp` | `string\|null` | Most recent processing update timestamp |
| `latest_completed_timestamp` | `string\|null` | Most recent completion timestamp |
| `latest_created_name` | `string\|null` | Filename of the latest document |
| `latest_created_status` | `int\|null` | Status code of the latest document |
| `total_pdfs` | `int` | Total document count across all statuses |
| `failed_pdfs` | `int` | Count of failed documents |
| `avg_processing_seconds` | `float\|null` | Average processing time for completed documents |
| `processing_details` | `array` | Breakdown of in-progress documents grouped by status |
| `currently_processing` | `array` | List of individual documents currently being processed |
| `recent_files` | `array` | Last 5 completed or failed documents with timestamps |

<details>
<summary>Example response</summary>

```json
{
"processed_pdfs": 10,
"processing_pdfs": 3,
"latest_processing_timestamp": "2024-06-01 12:00:00",
"latest_completed_timestamp": "2024-06-01 11:30:00",
"latest_created_name": "invoice.pdf",
"latest_created_status": 2,
"total_pdfs": 15,
"failed_pdfs": 2,
"avg_processing_seconds": 45.68,
"processing_details": [
{"status": "OCR Processing", "status_code": 2, "count": 2},
{"status": "Reading Metadata", "status_code": 1, "count": 1}
],
"currently_processing": [
{
"id": 12,
"file_name": "scan1.pdf",
"status": "OCR Processing",
"status_code": 2,
"created": "2024-06-01 12:00:00",
"pdf_pages": 3
}
],
"recent_files": [
{
"id": 11,
"file_name": "doc1.pdf",
"status": "Completed",
"status_code": 5,
"created": "2024-06-01 10:00:00",
"completed": "2024-06-01 10:01:00",
"pdf_pages": 2
}
]
}
```

</details>

## 🔮 Upcoming Features
- **Notifications**: Stay informed with real-time updates.
- **OCR Settings**: Take control of OCR settings in the web interface
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
services:
web_service:
web-service:
build:
context: .
dockerfile: web_service/Dockerfile
Expand Down Expand Up @@ -29,7 +29,7 @@ services:
context: .
dockerfile: test_service/Dockerfile
depends_on:
- web_service
- web-service
- redis
networks:
- test-network
Expand Down
3 changes: 1 addition & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ services:
- rabbitmq
command: ["python", "main.py"]

web_service:
image: web_service
web-service:
restart: unless-stopped
build:
context: .
Expand Down
12 changes: 12 additions & 0 deletions scansynclib/scansynclib.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Metadata-Version: 2.4
Name: scansynclib
Version: 0.1.0
Summary: Shared helper library for ScanSync
Requires-Python: >=3.13
Requires-Dist: colorlog
Requires-Dist: tenacity
Requires-Dist: pika
Requires-Dist: openai
Requires-Dist: msal
Requires-Dist: pypdf
Requires-Dist: redis
18 changes: 18 additions & 0 deletions scansynclib/scansynclib.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
pyproject.toml
./scansynclib/ProcessItem.py
./scansynclib/__init__.py
./scansynclib/config.py
./scansynclib/helpers.py
./scansynclib/logging.py
./scansynclib/ollama_helper.py
./scansynclib/onedrive_api.py
./scansynclib/onedrive_smb_manager.py
./scansynclib/openai_helper.py
./scansynclib/settings.py
./scansynclib/settings_schema.py
./scansynclib/sqlite_wrapper.py
scansynclib.egg-info/PKG-INFO
scansynclib.egg-info/SOURCES.txt
scansynclib.egg-info/dependency_links.txt
scansynclib.egg-info/requires.txt
scansynclib.egg-info/top_level.txt
1 change: 1 addition & 0 deletions scansynclib/scansynclib.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

7 changes: 7 additions & 0 deletions scansynclib/scansynclib.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
colorlog
tenacity
pika
openai
msal
pypdf
redis
1 change: 1 addition & 0 deletions scansynclib/scansynclib.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
scansynclib
2 changes: 1 addition & 1 deletion scansynclib/scansynclib/config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "0.4.0",
"version": "0.4.1",
"failedDir": "failed-documents",
"db": {
"path": "data/scansync.db"
Expand Down
6 changes: 4 additions & 2 deletions test_service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ FROM seleniarm/standalone-chromium:latest
# Switch to the root user to install dependencies
USER root

# Update package lists and install Python3, pip, and venv
RUN apt-get update && apt-get install -y python3 python3-pip python3-venv
# Update package lists and install Python3, pip, and venv.
# --force-overwrite handles file ownership changes during package splits
# (e.g. openssl-provider-legacy taking legacy.so from libssl3t64).
RUN apt-get update && apt-get install -y -o Dpkg::Options::="--force-overwrite" python3 python3-pip python3-venv

# Create a Python virtual environment
RUN python3 -m venv /opt/venv
Expand Down
24 changes: 14 additions & 10 deletions tests/test_homepage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def driver():


def test_dashboard_text_first_start(driver):
driver.get("http://web_service:5001")
driver.get("http://web-service:5001")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Get started in three steps:" in driver.page_source
Expand All @@ -41,7 +41,7 @@ def test_dashboard_text_first_start(driver):


def test_dashboard_sync_first_start(driver):
driver.get("http://web_service:5001/sync")
driver.get("http://web-service:5001/sync")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Set up or manage your OneDrive connections for syncing." in driver.page_source
Expand All @@ -53,7 +53,7 @@ def test_dashboard_sync_first_start(driver):


def test_dashboard_settings_first_start_onedrive(driver):
driver.get("http://web_service:5001/settings?tab=onedrive-tab")
driver.get("http://web-service:5001/settings?tab=onedrive-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Settings" in driver.find_element(By.TAG_NAME, "h1").text
Expand All @@ -63,7 +63,7 @@ def test_dashboard_settings_first_start_onedrive(driver):


def test_dashboard_settings_tabs(driver):
driver.get("http://web_service:5001/settings?tab=ocr-tab")
driver.get("http://web-service:5001/settings?tab=ocr-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))

assert "OCR settings will be available in the future." in driver.page_source
Expand All @@ -77,7 +77,7 @@ def test_dashboard_settings_tabs(driver):


def test_dashboard_settings_file_naming_first_start(driver):
driver.get("http://web_service:5001/settings?tab=file-naming-tab")
driver.get("http://web-service:5001/settings?tab=file-naming-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title
assert "Choose your automatic file naming method:" in driver.page_source
Expand All @@ -93,7 +93,7 @@ def test_dashboard_settings_file_naming_first_start(driver):


def test_dashboard_settings_ollama_first_start(driver):
driver.get("http://web_service:5001/settings?tab=file-naming-tab")
driver.get("http://web-service:5001/settings?tab=file-naming-tab")
WebDriverWait(driver, 10).until(EC.title_contains("ScanSync"))
assert "ScanSync" in driver.title

Expand All @@ -115,8 +115,12 @@ def test_dashboard_settings_ollama_first_start(driver):
assert driver.find_element(By.ID, "ollama_server_port").get_attribute("value") == "11434"

driver.find_element(By.ID, "ollama-connect-btn").click()
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.ID, "ollama-error"))
# Wait for either the error div or the models section to become visible,
# depending on whether Ollama is reachable in the test environment.
WebDriverWait(driver, 15).until(
lambda d: d.find_element(By.ID, "ollama-error").is_displayed()
or d.find_element(By.ID, "ollama-models-section").is_displayed()
)
ollama_error = driver.find_element(By.ID, "ollama-error").text
assert "Could not connect to Ollama server." in ollama_error
error_div = driver.find_element(By.ID, "ollama-error")
models_section = driver.find_element(By.ID, "ollama-models-section")
assert error_div.is_displayed() or models_section.is_displayed()
Loading
Loading