diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 00000000..2420c97a
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,124 @@
+name: backend
+
+on:
+  pull_request:
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  tests:
+    name: pytest
+    runs-on: ubuntu-latest
+
+    defaults:
+      run:
+        working-directory: ./backend
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_DB: postgres
+          POSTGRES_USER: admin
+          POSTGRES_PASSWORD: password
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U admin -d postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 10
+
+    env:
+      POSTGRES_MODE: local
+      POSTGRES_HOST: 127.0.0.1
+      POSTGRES_PORT: 5432
+      POSTGRES_DATABASE: postgres
+      POSTGRES_USER: admin
+      POSTGRES_PASSWORD: password
+      POSTGRES_CONNECT_TIMEOUT: 10
+      STORAGE_MODE: local
+      LOCAL_STORAGE_BASE_PATH: /tmp/can-sr-uploads
+      AZURE_OPENAI_MODE: key
+      AZURE_DOC_INT_MODE: key
+      PYTHONUNBUFFERED: 1
+      AZURE_DOC_INT_ENDPOINT: fake
+      ENTREZ_EMAIL: fake
+      ENTREZ_API_KEY: fake
+      AZURE_STORAGE_CONNECTION_STRING: fake
+      SCOPUS_API_URL: fake
+
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+          cache-dependency-path: |
+            backend/requirements.txt
+            backend/requirements_dev.txt
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt -r requirements_dev.txt
+
+      - name: Run pytest with coverage
+        run: |
+          mkdir -p /tmp/can-sr-uploads
+          python -m coverage run -m pytest tests/
+          python -m coverage xml -o coverage.xml
+          python -m coverage report
+
+      - name: Generate diff coverage
+        if: github.event.pull_request.base.sha != ''
+        continue-on-error: true
+        run: |
+          diff-cover coverage.xml \
+            --compare-branch=${{ github.event.pull_request.base.sha }} \
+            --markdown-report diff-cover.md \
+            --html-report diff-cover.html
+
+      - name: Build diff coverage comment body
+        if: github.event.pull_request.base.sha != ''
+        run: |
+          {
+            echo "<!-- can-sr-diff-coverage -->"
+            echo "## Diff coverage"
+            if [ -f diff-cover.md ]; then
+              cat diff-cover.md
+            else
+              echo "_Diff coverage report could not be generated for this run._"
+            fi
+            echo
+            echo "Coverage XML and HTML reports are attached to this workflow run as artifacts."
+          } > diff-coverage-comment.md
+
+      - name: Publish diff coverage comment
+        if: github.event.pull_request.base.sha != ''
+        continue-on-error: true
+        uses: edumserrano/find-create-or-update-comment@v2
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          body-includes: '<!-- can-sr-diff-coverage -->'
+          comment-author: 'github-actions[bot]'
+          body-path: backend/diff-coverage-comment.md
+          edit-mode: replace
+
+      - name: Upload coverage artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-reports
+          if-no-files-found: ignore
+          path: |
+            backend/coverage.xml
+            backend/diff-cover.html
diff --git a/backend/README.md b/backend/README.md
index a07285ca..3a5cbbb2 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -519,3 +519,16 @@ The importer currently supports these common include names:
 
 If you put **rispy keys directly** in `include:` (e.g. `doi`, `authors`), the
 importer will also attempt to copy them through as-is.
+
+
+
+## Running tests
+
+From the backend/ directory,
+1. `pip install -r requirements_dev.txt` 
+2. `pytest`
+
+To calculate coverage,
+1. `coverage run -m pytest`
+2. `coverage html`
+3. Open `./htmlcov/index.html` in a browser to view the report and browse around line by line
diff --git a/backend/api/__init__.py b/backend/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/auth/__init__.py b/backend/api/auth/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/citations/__init__.py b/backend/api/citations/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/core/__init__.py b/backend/api/core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/database_search/__init__.py b/backend/api/database_search/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/extract/__init__.py b/backend/api/extract/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/files/__init__.py b/backend/api/files/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/screen/__init__.py b/backend/api/screen/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/services/citation_search/__init__.py b/backend/api/services/citation_search/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/sr/__init__.py b/backend/api/sr/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/utils/__init__.py b/backend/api/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
new file mode 100644
index 00000000..efcd6591
--- /dev/null
+++ b/backend/pyproject.toml
@@ -0,0 +1,12 @@
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+
+[tool.coverage.run]
+source = ["."]
+omit = [
+    "tests/*",
+    "venv/",
+]
+[tool.coverage.report]
+show_missing = true
+skip_covered = true
diff --git a/backend/requirements_dev.txt b/backend/requirements_dev.txt
new file mode 100644
index 00000000..fbe20783
--- /dev/null
+++ b/backend/requirements_dev.txt
@@ -0,0 +1,5 @@
+pytest==9.0.2
+pytest-asyncio==1.3.0
+coverage==7.6.12
+diff-cover==9.1.1
+ipython==9
diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/tests/test_smoke.py b/backend/tests/test_smoke.py
new file mode 100644
index 00000000..b765b384
--- /dev/null
+++ b/backend/tests/test_smoke.py
@@ -0,0 +1,11 @@
+import asyncio
+
+
+def test_smoke():
+    assert True
+
+
+
+async def test_my_async_code():
+    result = await asyncio.sleep(0.01, result="success")
+    assert result == "success"
\ No newline at end of file
diff --git a/backend/tests/with_db/__init__.py b/backend/tests/with_db/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/tests/with_db/conftest.py b/backend/tests/with_db/conftest.py
new file mode 100644
index 00000000..0a67528b
--- /dev/null
+++ b/backend/tests/with_db/conftest.py
@@ -0,0 +1,265 @@
+from contextlib import asynccontextmanager
+import os
+
+import psycopg2
+import psycopg2.sql
+import pytest
+
+from api.core.config import settings
+from api.services.postgres_auth import postgres_server
+from api.services.user_db import  user_db_service
+from api.services.sr_db_service import srdb_service
+
+
+def _derive_test_db_name(base_name: str) -> str:
+    if base_name.endswith("_test"):
+        return f"{base_name}_session"
+    return f"{base_name}_test"
+
+
+def _connection_kwargs(database: str) -> dict:
+    profile = settings.postgres_profile()
+    kwargs = {
+        "host": profile.get("host"),
+        "dbname": database,
+        "user": profile.get("user"),
+        "password": profile.get("password"),
+        "port": int(profile.get("port") or 5432),
+        "connect_timeout": int(os.getenv("POSTGRES_CONNECT_TIMEOUT", "3")),
+    }
+    sslmode = profile.get("sslmode")
+    if sslmode:
+        kwargs["sslmode"] = sslmode
+    return kwargs
+
+
+def _admin_connection(database: str):
+    conn = psycopg2.connect(
+        **_connection_kwargs(database),
+    )
+    conn.autocommit = True
+    return conn
+
+
+def _drop_database_if_exists(admin_database: str, database_name: str) -> None:
+    conn = _admin_connection(admin_database)
+    try:
+        with conn.cursor() as cur:
+            cur.execute(
+                "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s",
+                (database_name,),
+            )
+            cur.execute(
+                psycopg2.sql.SQL("DROP DATABASE IF EXISTS {}").format(
+                    psycopg2.sql.Identifier(database_name)
+                )
+            )
+    finally:
+        conn.close()
+
+
+def _create_database(admin_database: str, database_name: str) -> None:
+    conn = _admin_connection(admin_database)
+    try:
+        with conn.cursor() as cur:
+            cur.execute(
+                psycopg2.sql.SQL("CREATE DATABASE {}").format(
+                    psycopg2.sql.Identifier(database_name)
+                )
+            )
+    finally:
+        conn.close()
+
+"""
+Note on proxies and monkeypatching:
+
+ - UserDatabaseService and other code call postgres_server.aconn(), which normally opens a brand new connection each time
+  - A per-test rollback only works if all DB work for that test goes through the same connection object
+  - The proxy makes commit() effectively a no-op and keeps all async calls on the same connection until the fixture rolls it back at teardown
+  - The sync proxy covers code paths that use postgres_server.conn directly
+
+  Without the proxy:
+
+  - each aconn() block would open/close its own connection and commit on exit
+  - the final fixture rollback would not undo those committed changes
+  - the tests would stop being isolated
+
+TODO: clean this up with a better approach:
+- refactor services to use some kind of managed connection that can be overridden in tests without monkeypatching 
+- I think this is what django does?
+
+"""
+
+class _SyncConnectionProxy:
+    def __init__(self, conn):
+        self._conn = conn
+
+    @property
+    def closed(self):
+        return self._conn.closed
+
+    def cursor(self, *args, **kwargs):
+        return self._conn.cursor(*args, **kwargs)
+
+    def commit(self):
+        return None
+
+    def rollback(self):
+        return self._conn.rollback()
+
+    def close(self):
+        return self._conn.close()
+
+    def get_transaction_status(self):
+        return self._conn.get_transaction_status()
+
+    def __getattr__(self, name):
+        return getattr(self._conn, name)
+
+
+class _AsyncCursorProxy:
+    def __init__(self, cursor):
+        self._cursor = cursor
+
+    class _RowAdapter:
+        """
+        hacky: some code (sr, sync) expects dict-like access to rows, other (user, async) expects tuple-like access
+
+        This accommodates both, but only in tests! In production or dev, only one will work at a time
+
+        why did we do this? See comment on proxies above, 
+        it was difficult to patch single behavior without both,
+        and this was a lighter touch on existing code
+
+        """
+        def __init__(self, row, description):
+            self._row = list(row)
+            self._columns = [desc[0] for desc in description or []]
+            self._index = {name: idx for idx, name in enumerate(self._columns)}
+
+        def __getitem__(self, key):
+            if isinstance(key, str):
+                return self._row[self._index[key]]
+            return self._row[key]
+
+        def __setitem__(self, key, value):
+            if isinstance(key, str):
+                self._row[self._index[key]] = value
+            else:
+                self._row[key] = value
+
+        def get(self, key, default=None):
+            try:
+                return self[key]
+            except (KeyError, IndexError, TypeError):
+                return default
+
+        def keys(self):
+            return list(self._columns)
+
+        def items(self):
+            return [(name, self[name]) for name in self._columns]
+
+        def values(self):
+            return [self[name] for name in self._columns]
+
+        def __contains__(self, key):
+            return key in self._index
+
+        def __len__(self):
+            return len(self._row)
+
+    async def fetchone(self):
+        row = self._cursor.fetchone()
+        return self._RowAdapter(row, self._cursor.description) if row is not None else None
+
+    async def fetchall(self):
+        return [self._RowAdapter(row, self._cursor.description) for row in self._cursor.fetchall()]
+
+    @property
+    def rowcount(self):
+        return self._cursor.rowcount
+
+
+class _AsyncConnectionProxy:
+    def __init__(self, conn):
+        self._conn = conn
+
+    async def execute(self, query, params=None):
+        cursor = self._conn.cursor()
+        cursor.execute(query, params)
+        return _AsyncCursorProxy(cursor)
+
+    async def commit(self):
+        return None
+
+    async def rollback(self):
+        return self._conn.rollback()
+
+    async def close(self):
+        return self._conn.close()
+
+    def get_transaction_status(self):
+        return self._conn.get_transaction_status()
+
+    @property
+    def closed(self):
+        return self._conn.closed
+
+
+@pytest.fixture(scope="session", autouse=True)
+def test_postgres_database():
+    original_database = settings.POSTGRES_DATABASE
+    if not original_database:
+        raise RuntimeError("POSTGRES_DATABASE must be set for with_db tests")
+
+    test_database = _derive_test_db_name(original_database)
+
+    _drop_database_if_exists(original_database, test_database)
+    _create_database(original_database, test_database)
+
+    settings.POSTGRES_DATABASE = test_database
+    postgres_server.close()
+
+    try:
+        yield test_database
+    finally:
+        postgres_server.close()
+        settings.POSTGRES_DATABASE = original_database
+        _drop_database_if_exists(original_database, test_database)
+
+
+@pytest.fixture(scope="session", autouse=True)
+async def seed_database(test_postgres_database):
+    """
+    Rather than touch 'test_postgres_database', add business logic here to create tables and seed any necessary data
+    """
+    srdb_service.ensure_table_exists()
+
+    await user_db_service.ensure_table_exists()
+
+
+
+
+@pytest.fixture(autouse=True)
+async def db_transaction(test_postgres_database, monkeypatch):
+    sync_conn = psycopg2.connect(
+        **_connection_kwargs(test_postgres_database),
+    )
+    sync_proxy = _SyncConnectionProxy(sync_conn)
+    async_proxy = _AsyncConnectionProxy(sync_proxy)
+
+    @asynccontextmanager
+    async def _patched_aconn():
+        yield async_proxy
+
+    monkeypatch.setattr(postgres_server, "_conn", sync_proxy, raising=False)
+    monkeypatch.setattr(postgres_server, "aconn", _patched_aconn, raising=False)
+
+    try:
+        yield
+    finally:
+        try:
+            sync_conn.rollback()
+        finally:
+            sync_conn.close()
diff --git a/backend/tests/with_db/test_meta_db_tests.py b/backend/tests/with_db/test_meta_db_tests.py
new file mode 100644
index 00000000..cbc23666
--- /dev/null
+++ b/backend/tests/with_db/test_meta_db_tests.py
@@ -0,0 +1,91 @@
+from api.services.user_db import UserDatabaseService, user_db_service
+from api.models.auth import UserCreate
+from api.services.sr_db_service import SRDBService, srdb_service
+
+
+async def test_user_table_clean():
+    users = await user_db_service.get_all_users()
+    assert len(users) == 0
+
+async def test_create_user():
+    u = await user_db_service.get_user_by_email("a@b.com")
+    assert u is None
+
+    created = await user_db_service.create_user(
+        UserCreate(
+            email="a@b.com",
+            full_name="Test User",
+            password="password123",
+        )
+    )
+
+    assert created is not None
+    assert created.email == "a@b.com"
+
+
+    u = await user_db_service.get_user_by_email("a@b.com")
+    assert u is not None
+
+
+async def test_user_from_other_test_doesnt_exist():
+    # this should ensure that the database is clean before each test
+    u = await user_db_service.get_user_by_email("a@b.com")
+    assert u is None
+
+
+def test_synchronous_db_api():
+    current = srdb_service.list_systematic_reviews_for_user("a@b.com")
+    assert len(current) == 0
+
+    created = srdb_service.create_systematic_review(
+        name="Test SR",
+        description="A test systematic review",
+        criteria_str="Test inclusion/exclusion criteria",
+        criteria_obj={"inclusion": ["Test inclusion criteria"], "exclusion": ["Test exclusion criteria"]},
+        owner_id="1",
+        owner_email="a@b.com",
+    )
+def test_synchronous_db_api_reset_after_transaction():
+    current = srdb_service.list_systematic_reviews_for_user("a@b.com")
+    assert len(current) == 0
+
+
+async def test_mixing_sync_and_async_db_calls():
+    """
+    This test shows that mixing sync and async calls doesn't raise exceptions 
+
+    It's probably still a bad idea
+    """
+    
+    current = srdb_service.list_systematic_reviews_for_user("a@b.com")
+    assert len(current) == 0
+
+    users = await user_db_service.get_all_users()
+    assert len(users) == 0
+
+    created = await user_db_service.create_user(
+        UserCreate(
+            email="a@b.com",
+            full_name="Test User",
+            password="password123",
+        )
+    )
+
+    sr = srdb_service.create_systematic_review(
+        name="Test SR",
+        description="A test systematic review",
+        criteria_str="Test inclusion/exclusion criteria",
+        criteria_obj={"inclusion": ["Test inclusion criteria"], "exclusion": ["Test exclusion criteria"]},
+        owner_id="1",
+        owner_email="a@b.com",
+    )
+
+    user = await user_db_service.get_user_by_email("a@b.com")
+    assert user is not None
+
+    sr = srdb_service.get_systematic_review(sr['id'])
+    assert sr is not None
+
+
+            
+