cdliai · fbkaragoz · May 14, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
@@ -4,7 +4,19 @@ All notable changes to OpenCR are documented here. The format follows [Keep a Ch
 
 ## [Unreleased]
 
+### Changed
+
+- **Breaking:** OpenCR is GPU-first again. The in-process Apple Silicon / CPU
+  `MODEL_BACKEND=local` path, CPU Docker profile, and local `transformers`
+  dependency file were removed.
+- Default OCR model is now `deepseek-ai/DeepSeek-OCR-2`.
+- `docker compose up -d` now starts the NVIDIA/vLLM stack directly; no compose
+  profile is required.
+
+## [v1.0.0]
+
 ### Added
+
 - Apache-2.0 license (`LICENSE`).
 - English-first README with Turkish sibling at `README.tr.md`.
 - `CONTRIBUTING.md`, GitHub Actions CI workflow, project `Makefile`.
@@ -15,11 +27,13 @@ All notable changes to OpenCR are documented here. The format follows [Keep a Ch
 - Publish modal now prefills `username/run-name` and adds the `opencr` discoverability tag to dataset cards.
 
 ### Changed
+
 - **Breaking:** `docker compose up` no longer starts services without an explicit profile. Use `--profile gpu` (vLLM, NVIDIA) or `--profile cpu` (in-process transformers).
 - `INPUT_DIR` / `OUTPUT_DIR` default to `./input` / `./output` outside Docker, `/data/...` inside.
 - OpenAPI metadata now declares Apache-2.0; UI footer no longer claims "All rights reserved".
 
 ### Fixed
+
 - `.gitignore` now covers `.DS_Store`, IDE folders, lint caches, and HF caches.
 
 ---
@@ -34,6 +48,7 @@ All notable changes to OpenCR are documented here. The format follows [Keep a Ch
 6. GitHub auto-creates a release page from the tag; paste the changelog entry into it.
 
 Bump rules:
+
 - **PATCH** for bug fixes that don't change behavior.
 - **MINOR** for backwards-compatible features.
 - **MAJOR** for breaking changes (env var renames, removed endpoints, behavior shifts users have to adapt to).
@@ -19,8 +19,8 @@ expected text is one of the highest-leverage contributions.
 PaddleOCR / Marker on a Turkish corpus and post the table — even 
 informal numbers are useful.
 
-- **Model-backend ports.** MLX, llama.cpp, ONNX, or any other runtime 
-that improves throughput on a target platform.
+- **Deployment recipes.** vLLM, hosted GPU endpoints, and reproducible
+benchmark environments that improve throughput or quality.
 
 - **Translations.** README and dataset cards in additional languages.
 
@@ -33,7 +33,7 @@ make install
 make test
 ```
 
-`make run` starts a local dev server on http://localhost:39672 with the `local` model backend (no GPU needed; ~5–30 s/page on M-series Macs).
+`make run` starts a local dev server on http://localhost:39672 and points it at `MODEL_SERVER_URL` (default: `http://localhost:39671`). Start the bundled GPU stack with `docker compose up -d`, or provide another OpenAI-compatible GPU endpoint.
 
 ## Code style
 

@@ -1,34 +1,35 @@
-.PHONY: help install install-local run run-remote test lint format docker-up docker-down clean
+.PHONY: help install run run-remote test lint format docker-up docker-down clean
 
 PY ?= python3
 VENV ?= .venv
 PIP := $(VENV)/bin/pip
 PYBIN := $(VENV)/bin/python
+MODEL_SERVER_URL ?= http://localhost:39671
 
 help:
 	@echo "OpenCR developer targets:"
-	@echo "  make install        # venv + base deps + local-backend deps (Mac/CPU friendly)"
-	@echo "  make run            # start dev server on http://localhost:39672 with the local backend"
+	@echo "  make install        # venv + base/dev deps"
+	@echo "  make run            # start dev server on http://localhost:39672, using MODEL_SERVER_URL"
 	@echo "  make run-remote     # start dev server pointing at MODEL_SERVER_URL"
 	@echo "  make test           # run pytest suite"
 	@echo "  make lint           # ruff check"
 	@echo "  make format         # ruff format"
-	@echo "  make docker-up      # docker compose up (NVIDIA GPU profile)"
+	@echo "  make docker-up      # docker compose up (NVIDIA GPU stack)"
 	@echo "  make docker-down    # docker compose down"
 
 $(VENV):
 	$(PY) -m venv $(VENV)
 	$(PIP) install -U pip
 
 install: $(VENV)
-	$(PIP) install -r ocr_pipeline/requirements.txt -r requirements-local.txt
-	$(PIP) install pytest pytest-asyncio ruff
+	$(PIP) install -r ocr_pipeline/requirements.txt
+	$(PIP) install -r requirements-dev.txt
 
 run: $(VENV)
-	MODEL_BACKEND=local $(PYBIN) -m uvicorn ocr_pipeline.main:app --host 0.0.0.0 --port 39672 --reload
+	MODEL_BACKEND=remote MODEL_SERVER_URL=$(MODEL_SERVER_URL) $(PYBIN) -m uvicorn ocr_pipeline.main:app --host 0.0.0.0 --port 39672 --reload
 
 run-remote: $(VENV)
-	MODEL_BACKEND=remote $(PYBIN) -m uvicorn ocr_pipeline.main:app --host 0.0.0.0 --port 39672 --reload
+	MODEL_BACKEND=remote MODEL_SERVER_URL=$(MODEL_SERVER_URL) $(PYBIN) -m uvicorn ocr_pipeline.main:app --host 0.0.0.0 --port 39672 --reload
 
 test: $(VENV)
 	PYTHONPATH=. $(PYBIN) -m pytest -q
@@ -40,7 +41,7 @@ format: $(VENV)
 	$(VENV)/bin/ruff format ocr_pipeline tests scripts
 
 docker-up:
-	docker compose --profile gpu up -d
+	docker compose up -d
 
 docker-down:
 	docker compose down

@@ -10,16 +10,16 @@ For Turkish documents, see: [README.tr.md](./README.tr.md)
 
 ## Why OpenCR?
 
-- **Turkish-first accuracy.** Built around DeepSeek-OCR, it handles Turkish characters and difficult page layouts better than off-the-shelf OCR.
+- **Turkish-first accuracy.** Built around DeepSeek-OCR-2, it handles Turkish characters and difficult page layouts better than off-the-shelf OCR.
 - **Dataset factory.** Outputs are packaged directly as `pages.parquet` + `documents.parquet` with deterministic train/validation/test splits and a HuggingFace dataset card.
 - **Operator console.** A single-page web UI to monitor runs, page-by-page validate quality, retry, and publish to HuggingFace.
-- **Pluggable backends.** Production-grade NVIDIA + vLLM by default; runs in-process on Apple Silicon / CPU for development; or talk to any OpenAI-compatible model server.
+- **GPU-first backend.** Production-grade NVIDIA + vLLM by default, with an optional remote mode for any OpenAI-compatible GPU model server.
 
 ---
 
 ## Quickstart
 
-### Option 1 — Docker (NVIDIA GPU, fastest path to inference)
+### Option 1 — Docker (NVIDIA GPU, primary path)
 
 Requires Docker, an NVIDIA GPU, and the NVIDIA Container Toolkit.
 
@@ -29,27 +29,10 @@ docker compose up -d
 
 Open http://localhost:39672. Drop PDFs in `./input/`, hit **Start OCR run**.
 
-### Option 2 — Apple Silicon / CPU (in-process inference, no GPU needed)
-
-For local development, demos, and small jobs on a Mac or Linux box with no GPU.
-
-```bash
-git clone https://github.com/cdliai/opencr.git
-cd opencr
-python3 -m venv .venv && source .venv/bin/activate
-pip install -r ocr_pipeline/requirements.txt -r requirements-local.txt
-MODEL_BACKEND=local ./scripts/start.sh
-```
-
-Open http://localhost:39672. The DeepSeek-OCR model (~6 GB) downloads 
-on first request and runs in-process via `transformers` on MPS (Apple Silicon) 
-or CPU. Expect **5–30 seconds per page on M-series, much slower on CPU** — 
-fine for development, not for production batch jobs.
-
-### Option 3 — Remote model server (point at any OpenAI-compatible endpoint)
+### Option 2 — Remote model server (point at any OpenAI-compatible endpoint)
 
 If you already run vLLM somewhere, or use OpenRouter, or another endpoint 
-serving DeepSeek-OCR:
+serving DeepSeek-OCR-2:
 
 ```bash
 pip install -r ocr_pipeline/requirements.txt
@@ -64,11 +47,10 @@ Configurable via environment variables (or a `.env` file):
 
 | Variable             | Default                          | Description                                                                                       |
 | -------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------- |
-| `MODEL_BACKEND`      | `vllm`                           | `vllm` (NVIDIA, OpenAI-compatible server), `local` (in-process transformers), `remote` (alias).   |
+| `MODEL_BACKEND`      | `vllm`                           | `vllm` for the bundled NVIDIA model server, or `remote` for another OpenAI-compatible endpoint.   |
 | `MODEL_SERVER_URL`   | `http://ocr-model:39671`         | Base URL for `vllm` / `remote` backends.                                                          |
-| `MODEL_NAME`         | `deepseek-ai/DeepSeek-OCR`       | Model identifier.                                                                                 |
+| `MODEL_NAME`         | `deepseek-ai/DeepSeek-OCR-2`     | Model identifier.                                                                                 |
 | `MODEL_API_KEY`      | `EMPTY`                          | API key for remote endpoints.                                                                     |
-| `LOCAL_DEVICE`       | auto                             | `auto`, `mps`, `cuda`, or `cpu` for the `local` backend.                                          |
 | `INPUT_DIR`          | `./input` (or `/data/input`)     | Where to read PDFs from.                                                                          |
 | `OUTPUT_DIR`         | `./output` (or `/data/output`)   | Where artifacts and the SQLite DB land.                                                           |
 | `HOST` / `PORT`      | `0.0.0.0` / `39672`              | Where the web console serves.                                                                     |
@@ -116,9 +98,8 @@ Published datasets are tagged `opencr` so they're discoverable via [HuggingFace'
                 ┌───────────────────────────────┐
                 │  Model backend                │
                 │  ┌─────────────────────────┐  │
-                │  │ vllm (NVIDIA, prod)     │  │
-                │  │ local (MPS/CPU, dev)    │  │
-                │  │ remote (any OpenAI URL) │  │
+                │  │ vLLM (NVIDIA, default)  │  │
+                │  │ remote (OpenAI URL)     │  │
                 │  └─────────────────────────┘  │
                 └───────────────────────────────┘
 ```
@@ -145,8 +126,8 @@ Tests live under `tests/`. UI is plain HTML + Alpine.js — no build step.
 ## Contributing
 
 Contributions are welcome — bug reports, Turkish-language 
-test fixtures, benchmarks against other OCR engines, model-backend 
-ports (MLX, llama.cpp), and documentation translations are 
+test fixtures, benchmarks against other OCR engines, deployment 
+recipes, and documentation translations are 
 especially useful. 
 
 See [CONTRIBUTING.md](./CONTRIBUTING.md).

@@ -4,19 +4,19 @@ OpenCR, özellikle Türkçe metinler, arşiv dökümanları ve karmaşık sayfa
 
 ## Neden OpenCR?
 
-- **Türkçe Odaklı Doğruluk:** DeepSeek-OCR tabanlı yapısıyla, standart OCR araçlarının zorlandığı Türkçe karakterlerde ve karmaşık sayfa düzenlerinde üstün performans sağlar.
+- **Türkçe Odaklı Doğruluk:** DeepSeek-OCR-2 tabanlı yapısıyla, standart OCR araçlarının zorlandığı Türkçe karakterlerde ve karmaşık sayfa düzenlerinde güçlü bir başlangıç noktası sağlar.
 - **Veri Seti Fabrikası:** Çıkarılan metinleri doğrudan `.parquet` formatında paketler ve tek tıkla HuggingFace'e yüklemeye hazır hale getirir.
 - **Operatör Konsolu:** İşlemleri izlemek, sayfa sayfa kontrol etmek ve hataları düzeltmek için modern bir web arayüzü sunar.
 
 ## Kurulum
 
-### Docker ile Çalıştırma (GPU Gerekir)
+### Docker ile Çalıştırma (NVIDIA GPU Gerekir)
 ```bash
-docker-compose up -d
+docker compose up -d
 ```
 
-### Lokal Geliştirme ve Web Arayüzü (Apple Silicon / CPU)
-Pipeline arayüzünü Apple bilgisayarınızda veya CPU üzerinde denemek için:
+### Harici Model Sunucusu ile Geliştirme
+Zaten çalışan OpenAI-compatible bir vLLM / GPU endpoint'iniz varsa:
 
 1. **Klasör ve Ortam Hazırlığı:**
    ```bash
@@ -30,13 +30,12 @@ Pipeline arayüzünü Apple bilgisayarınızda veya CPU üzerinde denemek için:
    ```bash
    export INPUT_DIR="./input"
    export OUTPUT_DIR="./output"
-   export PYTHONPATH=$PYTHONPATH:.
-   python3 ocr_pipeline/main.py
+   MODEL_BACKEND=remote MODEL_SERVER_URL="https://your-endpoint" ./scripts/start.sh
    ```
    Erişim: **http://localhost:39672**
 
 ## Mimari
-- **Backend:** vLLM tabanlı DeepSeek-OCR (Ağır iş yükü).
+- **Backend:** vLLM tabanlı DeepSeek-OCR-2 (GPU-first).
 - **Frontend/API:** FastAPI & Alpine.js (Yönetim konsolu).
 
 ---

@@ -1,15 +1,11 @@
-# Two profiles ship out of the box:
-#
-#   docker compose --profile gpu up -d   # production: vLLM model server + pipeline (NVIDIA)
-#   docker compose --profile cpu up -d   # CPU/Mac: pipeline only, in-process transformers backend
-#
-# Without an explicit --profile, no services run. Always pick one.
+# GPU-first OpenCR stack: vLLM model server + pipeline.
+# Requires Docker, an NVIDIA GPU, and the NVIDIA Container Toolkit.
 
 services:
   ocr-model:
-    profiles: ["gpu"]
     build: ./ocr-model
     runtime: nvidia
+    ipc: host
     restart: unless-stopped
     environment:
       - NVIDIA_VISIBLE_DEVICES=all
@@ -39,7 +35,6 @@ services:
       start_period: 600s
 
   ocr-pipeline:
-    profiles: ["gpu"]
     build: ./ocr_pipeline
     restart: unless-stopped
     ports:
@@ -50,30 +45,12 @@ services:
     environment:
       - MODEL_BACKEND=vllm
       - MODEL_SERVER_URL=http://ocr-model:39671
+      - MODEL_NAME=deepseek-ai/DeepSeek-OCR-2
       - INPUT_DIR=/data/input
       - OUTPUT_DIR=/data/output
     depends_on:
       ocr-model:
         condition: service_healthy
 
-  ocr-pipeline-cpu:
-    profiles: ["cpu"]
-    build:
-      context: .
-      dockerfile: ocr_pipeline/Dockerfile.cpu
-    restart: unless-stopped
-    ports:
-      - "39672:39672"
-    volumes:
-      - ./input:/data/input
-      - ./output:/data/output
-      - hf-cache:/root/.cache/huggingface
-    environment:
-      - MODEL_BACKEND=local
-      - LOCAL_DEVICE=cpu
-      - INPUT_DIR=/data/input
-      - OUTPUT_DIR=/data/output
-      - HF_HOME=/root/.cache/huggingface
-
 volumes:
   hf-cache: