From c0c9205a5e895e9c023424299df40d7601d71c52 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 09:33:31 -0600
Subject: [PATCH 1/9] [389] Support ingest from postgresql with testing

---
 .github/workflows/pr.yaml                     |  16 +++
 echopop/ingest/__init__.py                    |   2 +
 echopop/ingest/biological.py                  | 114 +++++++++++++++++-
 .../tests/fixtures/fixtures_biodata_loader.py | 110 +++++++++++++++++
 echopop/tests/ingest/test_biodata_loader.py   |  43 ++++++-
 .../tests/test_data/ingest/test_bio_data.sql  |  84 +++++++++++++
 requirements-dev.txt                          |   2 +
 requirements.txt                              |   1 +
 8 files changed, 370 insertions(+), 2 deletions(-)
 create mode 100644 echopop/tests/test_data/ingest/test_bio_data.sql
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 96e84b2b..a7432578 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -18,6 +18,22 @@ jobs:
         python-version: ["3.12", "3.13"]
         os: [ubuntu-latest, windows-latest, macos-latest]
       fail-fast: false
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: test_user
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_DB: test
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
     steps:
     - name: Check out repository code
       uses: actions/checkout@v6
diff --git a/echopop/ingest/__init__.py b/echopop/ingest/__init__.py
index dd9f0f40..a2358f71 100644
--- a/echopop/ingest/__init__.py
+++ b/echopop/ingest/__init__.py
@@ -4,6 +4,7 @@
     generate_composite_key,
     load_biodata_views,
     load_biological_data,
+    load_biodata_db_views,
 )
 from .mesh import load_isobath_data, load_mesh_data
 from .params import load_kriging_variogram_params
@@ -21,6 +22,7 @@
     "load_biological_data",
     "load_isobath_data",
     "load_biodata_views",
+    "load_biodata_db_views",
     "load_mesh_data",
     "load_kriging_variogram_params",
     "join_geostrata_by_latitude",
diff --git a/echopop/ingest/biological.py b/echopop/ingest/biological.py
index 72c7c51d..3a96872b 100644
--- a/echopop/ingest/biological.py
+++ b/echopop/ingest/biological.py
@@ -6,8 +6,9 @@
 import numpy as np
 import pandas as pd
 
-from ..utils import add_haul_uids
+from sqlalchemy import create_engine
 
+from ..utils import add_haul_uids
 
 def load_single_biological_sheet(
     biodata_filepath: Path,
@@ -89,6 +90,117 @@ def load_single_biological_view(
 
     return df_filtered
 
+def load_biodata_db_views(
+    db_credentials: Dict[str, str],
+    biodata_table_map: Dict[str, str],
+    column_name_map: Dict[str, str] = None,
+    subset_dict: Optional[Dict] = None,
+    biodata_label_map: Optional[Dict[str, Dict]] = None,
+    haul_uid_config: Dict[str, Any] = {},
+) -> Dict[str, pd.DataFrame] | None :
+    """
+    Load biological data from a postgres database.
+    Parameters
+    ----------
+    db_credentials : dict
+        Dictionary containing database credentials
+        (e.g., {"host": "localhost", "port": "5432", "dbname": "fisheries", "schema": "biodata"
+        "user": "<USERNAME>", "password": "<PASSWORD>"})
+    biodata_table_map : dict
+        Dictionary mapping dataset names to database table names
+        (e.g., {"specimen": "biodata_specimen", "length": "biodata_length", "catch": "biodata_catch"})
+    column_name_map : dict, optional
+        Dictionary mapping original column names to new column names
+        (e.g., {"frequency": "length_count", "haul": "haul_num"})
+    subset_dict : dict, optional
+        Subset dictionary containing ships and species_code for filtering
+        Format: {"ships": {ship_id: {"survey": survey_id, "haul_offset": offset}}, "species_code":
+        [codes]}
+    biodata_label_map : dict, optional
+        Dictionary mapping column names to value replacement dictionaries
+        (e.g., {"sex": {1: "male", 2: "female", 3: "unsexed"}})
+    haul_uid_config : Dict[str, Any]
+        Optional keyword arguments to override defaults or DataFrame values:
+
+        - ship_id (dict): Region-specific IDs, e.g., ``{'US': 10, 'CAN': 20}``.
+
+        - survey_id (dict): Region-specific IDs, e.g., ``{'US': 1, 'CAN': 2}``.
+
+        - species_id (int/str): A global species code override.
+
+        - haul_offset (int/float): A value subtracted from ``'haul_num'`` for records identified as
+          'CAN' (where ``haul_num - offset >= 0``).
+
+    Returns
+    -------
+    dict
+        Dictionary containing processed biological DataFrames keyed by dataset name
+    Examples
+    --------
+    >>> subset = {"ships": {160: {"survey": 201906}}, "species_code": [22500]}
+    >>> col_map = {"frequency": "length_count", "haul": "haul_num"}
+    >>> label_map = {"sex": {1: "male", 2: "female", 3: "unsexed"}}
+    """
+
+    try:
+        db_url = (f"postgresql+psycopg://"
+                  f"{db_credentials['user']}:{db_credentials['password']}@"
+                  f"{db_credentials['host']}:{db_credentials['port']}/"
+                  f"{db_credentials['dbname']}")
+
+        engine = create_engine(db_url)
+
+        biodata_dict = {}
+
+        with engine.connect() as connection:
+            for data_set, table in biodata_table_map.items():
+                query = f"SELECT * FROM {db_credentials['schema']}.{table};"
+
+                df_initial = pd.read_sql_query(query, connection)
+
+                # Force the column names to be lower case
+                df_initial.columns = df_initial.columns.str.lower()
+
+                # Rename the columns
+                if column_name_map:
+                    df_initial.rename(columns=column_name_map, inplace=True)
+
+                # # Validate data types for ship and survey before filtering
+                df_initial["ship"] = pd.to_numeric(df_initial["ship"])
+                df_initial["survey"] = pd.to_numeric(df_initial["survey"])
+
+                biodata_dict[data_set] = apply_ship_survey_filters(df_initial, subset_dict)
+
+        # Apply label mappings if provided
+        if biodata_label_map:
+            for col, mapping in biodata_label_map.items():
+                for name, df in biodata_dict.items():
+                    if isinstance(df, pd.DataFrame) and col in df.columns:
+                        df[col] = df[col].map(mapping).fillna(df[col])
+
+        # # Validate data types
+        biodata_dict["specimen"]["length"] = pd.to_numeric(biodata_dict["specimen"]["length"])
+        biodata_dict["specimen"]["weight"] = pd.to_numeric(biodata_dict["specimen"]["weight"])
+
+        # Reformat haul datatype
+        biodata_dict = {
+            k: v.assign(haul_num=v["haul_num"].astype(float)) for k, v in biodata_dict.items()
+        }
+
+        # Add UID labels
+        _ = {
+            k: add_haul_uids(v, _dataset_type=f"biodata.{k}", **haul_uid_config)
+            for k, v in biodata_dict.items()
+        }
+
+        return biodata_dict
+
+    except Exception as e:
+        print(f"Database error: {e}")
+
+    finally:
+        if 'engine' in locals():
+            engine.dispose()
 
 def load_biodata_views(
     biodata_filepaths: Dict[str, Path],
diff --git a/echopop/tests/fixtures/fixtures_biodata_loader.py b/echopop/tests/fixtures/fixtures_biodata_loader.py
index e2e32d84..0f2e9575 100644
--- a/echopop/tests/fixtures/fixtures_biodata_loader.py
+++ b/echopop/tests/fixtures/fixtures_biodata_loader.py
@@ -1,5 +1,90 @@
+import os
+from pathlib import Path
+
 import pandas as pd
 import pytest
+from sqlalchemy import create_engine, text
+
+HERE = Path(__file__).parent.absolute()
+TEST_DATA_ROOT = HERE.parent / "test_data"
+TEST_SQL_FILE = TEST_DATA_ROOT / "ingest" / "test_bio_data.sql"
+
+@pytest.fixture(scope="session")
+def postgres_container():
+    """
+    Session-scoped fixture to get database connection.
+
+    - In GitHub Actions: Uses the postgres service from the workflow
+    - Locally: Uses Testcontainers if Docker is available, skips if not
+    """
+    is_github_action = os.environ.get("GITHUB_ACTIONS")
+
+    if is_github_action:
+        # In GitHub Actions use the postgres service from workflow
+        yield type('obj', (object,), {
+            'get_connection_url': lambda: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
+            'get_container_host_ip': lambda: "localhost",
+            'get_exposed_port': lambda x: 5432
+        })()
+    else:
+        # Local development
+        try:
+            from testcontainers.postgres import PostgresContainer
+
+            container = PostgresContainer(
+                image="postgres:16",
+                username="test_user",
+                password="postgres",
+                dbname="test"
+            )
+            container.start()
+            yield container
+            container.stop()
+        except Exception as e:
+            # Docker not available - skip integration tests
+            pytest.skip(f"Docker must be running for Testcontainers: {e}")
+
+
+@pytest.fixture(scope="session")
+def database_credentials(postgres_container):
+    """
+    Session-scoped fixture to:
+    1. Connect to the PostgreSQL database (CI or local).
+    2. Load 'test_bio_data.sql' into it.
+    3. Yield the credentials dictionary in the format expected by load_biodata_db_views.
+
+    Returns dict with keys: host, port, dbname, user, password, schema
+    """
+
+    host = postgres_container.get_container_host_ip()
+    port = postgres_container.get_exposed_port(5432)
+
+    creds = {
+        "host": host,
+        "port": port,
+        "dbname": "test",
+        "user": "test_user",
+        "password": "postgres",
+        "schema": "public",
+    }
+
+    db_url = (
+        f"postgresql+psycopg://"
+        f"{creds['user']}:{creds['password']}@"
+        f"{creds['host']}:{creds['port']}/"
+        f"{creds['dbname']}"
+    )
+
+    try:
+        engine = create_engine(db_url)
+        with engine.begin() as connection:
+            with open(TEST_SQL_FILE, "r") as f:
+                sql_script = f.read()
+                connection.execute(text(sql_script))
+    except Exception as e:
+        pytest.fail(f"Failed to load {TEST_SQL_FILE}: {e}")
+
+    yield creds
 
 
 @pytest.fixture
@@ -77,6 +162,31 @@ def subset_dict():
     }
 
 
+@pytest.fixture
+def pg_subset_dict():
+    """Create subset dictionary for filtering biological data."""
+    return {
+        "ships": {101: {"survey": 2024}},
+        "species_code": [22500],
+    }
+
+@pytest.fixture
+def bio_data_table_map():
+    """Create table mapping for biological data in the database."""
+    return {
+        "catch": "echopop_catch",
+        "specimen": "echopop_fish"
+    }
+
+@pytest.fixture
+def column_name_map():
+    """Create column mapping for biological data loaded from the database."""
+    return {
+        "haul": "haul_num",
+        "weight_in_haul": "haul_weight",
+        "species_id": "species_code",
+    }
+
 @pytest.fixture
 def label_map():
     """Create label mapping dictionary for biological data."""
diff --git a/echopop/tests/ingest/test_biodata_loader.py b/echopop/tests/ingest/test_biodata_loader.py
index 2c46dc38..77909c65 100644
--- a/echopop/tests/ingest/test_biodata_loader.py
+++ b/echopop/tests/ingest/test_biodata_loader.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import pytest
 
-from echopop.ingest.biological import apply_ship_survey_filters, load_biological_data
+from echopop.ingest.biological import apply_ship_survey_filters, load_biological_data, load_biodata_db_views
 
 
 def test_load_biological_data_basic(bio_excel_file, bio_sheet_map):
@@ -91,3 +91,44 @@ def test_apply_ship_survey_filters_no_subset(biological_data):
 
     assert result is not df  # Not the same object
     pd.testing.assert_frame_equal(result, df)  # But same content
+
+# Ingest from database tests
+def test_load_biological_data_basic_from_db(database_credentials, bio_data_table_map):
+    """Test basic loading of biological data without optional parameters."""
+    result = load_biodata_db_views(database_credentials, bio_data_table_map)
+
+    assert isinstance(result, dict)
+
+    for df in result.values():
+        assert isinstance(df, pd.DataFrame)
+        assert not df.empty
+
+
+def test_load_biological_data_with_column_map_from_db(database_credentials, bio_data_table_map, column_name_map):
+    """Test loading with column name mapping."""
+    result = load_biodata_db_views(database_credentials, bio_data_table_map, column_name_map=column_name_map)
+
+    if "catch" in result:
+        assert "haul_weight" in result["catch"].columns
+        assert result["catch"].loc[3, "haul_weight"] == 250.0
+        assert "haul_num" in result["catch"].columns
+        assert "weight_in_haul" not in result["catch"].columns
+
+    if "specimen" in result:
+        assert "species_code" in result["specimen"].columns
+        assert result["specimen"].loc[2, "species_code"] == 22500
+        assert "haul_num" in result["catch"].columns
+
+
+def test_load_biological_data_with_subset_from_db(database_credentials, bio_data_table_map, pg_subset_dict, column_name_map):
+    """Test loading with subset filtering."""
+    result = load_biodata_db_views(
+        database_credentials, bio_data_table_map, subset_dict=pg_subset_dict, column_name_map=column_name_map
+    )
+
+    for df in result.values():
+        if "species_code" in df.columns:
+            assert (df["species_code"] == 22500).all()
+
+        if "ship" in df.columns:
+            assert (df["ship"] == 101).all()
\ No newline at end of file
diff --git a/echopop/tests/test_data/ingest/test_bio_data.sql b/echopop/tests/test_data/ingest/test_bio_data.sql
new file mode 100644
index 00000000..cf950e30
--- /dev/null
+++ b/echopop/tests/test_data/ingest/test_bio_data.sql
@@ -0,0 +1,84 @@
+-- =================================================================
+--  Database Seed File
+--  Generated from input_files document.
+-- =================================================================
+
+-- Drop existing objects --
+
+DROP TABLE IF EXISTS echopop_catch CASCADE;
+DROP TABLE IF EXISTS echopop_fish CASCADE;
+DROP TYPE IF EXISTS sex_enum;
+
+CREATE TYPE sex_enum AS ENUM (
+    'male',
+    'female',
+    'unsexed'
+);
+
+-- Create Main Data Tables --
+
+CREATE TABLE echopop_fish (
+    ship INTEGER NOT NULL,
+    survey INTEGER NOT NULL,
+    haul_num INTEGER NOT NULL,
+    species_code INTEGER NOT NULL,
+
+    sex sex_enum NOT NULL DEFAULT 'unsexed',
+
+    -- cm
+    length DECIMAL(10, 2) CHECK (length > 0),
+
+    -- kg
+    weight DECIMAL(10, 3) CHECK (weight > 0),
+
+    -- years
+    age DECIMAL(5, 1) CHECK (age >= 0)
+);
+
+CREATE TABLE echopop_catch (
+    ship INTEGER NOT NULL,
+    survey INTEGER NOT NULL,
+    haul_num INTEGER NOT NULL,
+    species_code INTEGER NOT NULL,
+
+    -- kg
+    weight_in_haul DECIMAL(10, 3) NOT NULL CHECK (weight_in_haul >= 0),
+
+    gear VARCHAR(50),
+    net_num INTEGER,
+
+    -- Ensure only one weight entry per haul/species
+    UNIQUE(ship, survey, haul_num, species_code)
+);
+
+-- Insert Data --
+
+INSERT INTO echopop_fish (ship, survey, haul_num, species_code, sex, length, weight, age) VALUES
+(101, 2024, 1, 22500, 'male', 30.5, 0.450, 4.0),
+(101, 2024, 1, 22500, 'male', 31.0, 0.465, 4.0),
+(101, 2024, 1, 22500, 'unsexed', 20.0, 0.2, 2.0),
+(101, 2024, 1, 22500, 'female', 32.0, 0.510, 5.0),
+(101, 2024, 1, 22500, 'unsexed', 15.2, NULL, 1.0),      -- NULL weight
+(101, 2024, 1, 206, 'female', 25.0, 0.300, 3.0),
+(101, 2024, 1, 206, 'female', 26.5, 0.320, 3.0),
+(101, 2024, 2, 22500, 'male', 40.0, 0.600, 6.0),
+(101, 2024, 2, 22500, 'female', 42.5, 0.650, 7.0),
+(101, 2024, 2, 22500, 'unsexed', NULL, NULL, NULL), -- All info missing
+(102, 2024, 1, 150, 'female', 45.0, 1.200, 10.0),
+(102, 2024, 1, 150, 'male', 40.0, 0.950, 8.0),
+(101, 2024, 1, 22500, 'male', 30.5, NULL, NULL),
+(101, 2024, 1, 22500, 'male', 31.0, NULL, NULL),
+(101, 2024, 1, 22500, 'unsexed', 20.0, NULL, NULL),
+(101, 2024, 1, 22500, 'female', 32.0, NULL, NULL),
+(101, 2024, 1, 22500, 'female', 31.0, NULL, NULL),
+(101, 2025, 1, 206, 'male', 35.0, 0.500, 5.0);
+
+INSERT INTO echopop_catch (ship, survey, haul_num, species_code, weight_in_haul, gear, net_num) VALUES
+(101, 2024, 1, 22500, 120.500, 'Aleutian Wing Trawl', 5880),
+(101, 2024, 1, 206, 75.200, 'Aleutian Wing Trawl', 5880),
+(101, 2024, 1, 150, 50.000, 'Aleutian Wing Trawl', 5880),
+(101, 2024, 2, 22500, 250.000, 'Aleutian Wing Trawl', 5594),
+(101, 2024, 3, 22500, 230.000, 'Aleutian Wing Trawl', 5594),
+(102, 2024, 1, 150, 50.000, 'Aleutian Wing Trawl', 5594),
+(102, 2024, 2, 22500, 40.000, NULL, NULL),
+(101, 2025, 1, 206, 90.000, 'Aleutian Wing Trawl', NULL);
\ No newline at end of file
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 25bd2e13..ec1b822d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,3 +5,5 @@ isort
 pre-commit
 pytest
 tox
+testcontainers[postgresql]
+psycopg[binary]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 1caa5e44..5722c163 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ pandas
 scipy
 numba>=0.63.0b1
 xarray>=2026.01.0
+sqlalchemy
 # Spatial data processing stack
 cartopy
 geopandas

From a188a6267c8d110d9c180232a45e5ed805bd0584 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:46:47 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 echopop/ingest/__init__.py                    |  2 +-
 echopop/ingest/biological.py                  | 21 ++++++++-----
 .../tests/fixtures/fixtures_biodata_loader.py | 30 ++++++++++---------
 echopop/tests/ingest/test_biodata_loader.py   | 26 ++++++++++++----
 .../tests/test_data/ingest/test_bio_data.sql  |  2 +-
 requirements-dev.txt                          |  2 +-
 6 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/echopop/ingest/__init__.py b/echopop/ingest/__init__.py
index ca814597..0fef78b8 100644
--- a/echopop/ingest/__init__.py
+++ b/echopop/ingest/__init__.py
@@ -12,9 +12,9 @@
 from .biological import (
     apply_composite_key,
     generate_composite_key,
+    load_biodata_db_views,
     load_biodata_views,
     load_biological_data,
-    load_biodata_db_views,
 )
 from .mesh import load_isobath_data, load_mesh_data
 from .params import load_kriging_variogram_params
diff --git a/echopop/ingest/biological.py b/echopop/ingest/biological.py
index 8718959c..dd3e8282 100644
--- a/echopop/ingest/biological.py
+++ b/echopop/ingest/biological.py
@@ -15,11 +15,11 @@
 
 import numpy as np
 import pandas as pd
-
 from sqlalchemy import create_engine
 
 from ..utils.base import add_haul_uids
 
+
 def load_single_biological_sheet(
     biodata_filepath: Path,
     sheet_name: str,
@@ -102,6 +102,7 @@ def load_single_biological_view(
 
     return df_filtered
 
+
 def load_biodata_db_views(
     db_credentials: Dict[str, str],
     biodata_table_map: Dict[str, str],
@@ -109,9 +110,10 @@ def load_biodata_db_views(
     subset_dict: Optional[Dict] = None,
     biodata_label_map: Optional[Dict[str, Dict]] = None,
     haul_uid_config: Dict[str, Any] = {},
-) -> Dict[str, pd.DataFrame] | None :
+) -> Dict[str, pd.DataFrame] | None:
     """
     Load biological data from a postgres database.
+
     Parameters
     ----------
     db_credentials : dict
@@ -147,18 +149,20 @@ def load_biodata_db_views(
     -------
     dict
         Dictionary containing processed biological DataFrames keyed by dataset name
+
     Examples
     --------
     >>> subset = {"ships": {160: {"survey": 201906}}, "species_code": [22500]}
     >>> col_map = {"frequency": "length_count", "haul": "haul_num"}
     >>> label_map = {"sex": {1: "male", 2: "female", 3: "unsexed"}}
     """
-
     try:
-        db_url = (f"postgresql+psycopg://"
-                  f"{db_credentials['user']}:{db_credentials['password']}@"
-                  f"{db_credentials['host']}:{db_credentials['port']}/"
-                  f"{db_credentials['dbname']}")
+        db_url = (
+            f"postgresql+psycopg://"
+            f"{db_credentials['user']}:{db_credentials['password']}@"
+            f"{db_credentials['host']}:{db_credentials['port']}/"
+            f"{db_credentials['dbname']}"
+        )
 
         engine = create_engine(db_url)
 
@@ -211,9 +215,10 @@ def load_biodata_db_views(
         print(f"Database error: {e}")
 
     finally:
-        if 'engine' in locals():
+        if "engine" in locals():
             engine.dispose()
 
+
 def load_biodata_views(
     biodata_filepaths: dict[str, Path],
     column_name_map: dict[str, str] = None,
diff --git a/echopop/tests/fixtures/fixtures_biodata_loader.py b/echopop/tests/fixtures/fixtures_biodata_loader.py
index 0f2e9575..4c9c709f 100644
--- a/echopop/tests/fixtures/fixtures_biodata_loader.py
+++ b/echopop/tests/fixtures/fixtures_biodata_loader.py
@@ -9,6 +9,7 @@
 TEST_DATA_ROOT = HERE.parent / "test_data"
 TEST_SQL_FILE = TEST_DATA_ROOT / "ingest" / "test_bio_data.sql"
 
+
 @pytest.fixture(scope="session")
 def postgres_container():
     """
@@ -21,21 +22,22 @@ def postgres_container():
 
     if is_github_action:
         # In GitHub Actions use the postgres service from workflow
-        yield type('obj', (object,), {
-            'get_connection_url': lambda: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
-            'get_container_host_ip': lambda: "localhost",
-            'get_exposed_port': lambda x: 5432
-        })()
+        yield type(
+            "obj",
+            (object,),
+            {
+                "get_connection_url": lambda: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
+                "get_container_host_ip": lambda: "localhost",
+                "get_exposed_port": lambda x: 5432,
+            },
+        )()
     else:
         # Local development
         try:
             from testcontainers.postgres import PostgresContainer
 
             container = PostgresContainer(
-                image="postgres:16",
-                username="test_user",
-                password="postgres",
-                dbname="test"
+                image="postgres:16", username="test_user", password="postgres", dbname="test"
             )
             container.start()
             yield container
@@ -78,7 +80,7 @@ def database_credentials(postgres_container):
     try:
         engine = create_engine(db_url)
         with engine.begin() as connection:
-            with open(TEST_SQL_FILE, "r") as f:
+            with open(TEST_SQL_FILE) as f:
                 sql_script = f.read()
                 connection.execute(text(sql_script))
     except Exception as e:
@@ -170,13 +172,12 @@ def pg_subset_dict():
         "species_code": [22500],
     }
 
+
 @pytest.fixture
 def bio_data_table_map():
     """Create table mapping for biological data in the database."""
-    return {
-        "catch": "echopop_catch",
-        "specimen": "echopop_fish"
-    }
+    return {"catch": "echopop_catch", "specimen": "echopop_fish"}
+
 
 @pytest.fixture
 def column_name_map():
@@ -187,6 +188,7 @@ def column_name_map():
         "species_id": "species_code",
     }
 
+
 @pytest.fixture
 def label_map():
     """Create label mapping dictionary for biological data."""
diff --git a/echopop/tests/ingest/test_biodata_loader.py b/echopop/tests/ingest/test_biodata_loader.py
index ebeba543..a90086af 100644
--- a/echopop/tests/ingest/test_biodata_loader.py
+++ b/echopop/tests/ingest/test_biodata_loader.py
@@ -3,7 +3,11 @@
 import pandas as pd
 import pytest
 
-from echopop.ingest.biological import apply_ship_survey_filters, load_biological_data, load_biodata_db_views
+from echopop.ingest.biological import (
+    apply_ship_survey_filters,
+    load_biodata_db_views,
+    load_biological_data,
+)
 
 
 def test_load_biological_data_basic(bio_excel_file, bio_sheet_map):
@@ -95,6 +99,7 @@ def test_apply_ship_survey_filters_no_subset(biological_data):
     assert result is not df  # Not the same object
     pd.testing.assert_frame_equal(result, df)  # But same content
 
+
 # Ingest from database tests
 def test_load_biological_data_basic_from_db(database_credentials, bio_data_table_map):
     """Test basic loading of biological data without optional parameters."""
@@ -107,9 +112,13 @@ def test_load_biological_data_basic_from_db(database_credentials, bio_data_table
         assert not df.empty
 
 
-def test_load_biological_data_with_column_map_from_db(database_credentials, bio_data_table_map, column_name_map):
+def test_load_biological_data_with_column_map_from_db(
+    database_credentials, bio_data_table_map, column_name_map
+):
     """Test loading with column name mapping."""
-    result = load_biodata_db_views(database_credentials, bio_data_table_map, column_name_map=column_name_map)
+    result = load_biodata_db_views(
+        database_credentials, bio_data_table_map, column_name_map=column_name_map
+    )
 
     if "catch" in result:
         assert "haul_weight" in result["catch"].columns
@@ -123,10 +132,15 @@ def test_load_biological_data_with_column_map_from_db(database_credentials, bio_
         assert "haul_num" in result["catch"].columns
 
 
-def test_load_biological_data_with_subset_from_db(database_credentials, bio_data_table_map, pg_subset_dict, column_name_map):
+def test_load_biological_data_with_subset_from_db(
+    database_credentials, bio_data_table_map, pg_subset_dict, column_name_map
+):
     """Test loading with subset filtering."""
     result = load_biodata_db_views(
-        database_credentials, bio_data_table_map, subset_dict=pg_subset_dict, column_name_map=column_name_map
+        database_credentials,
+        bio_data_table_map,
+        subset_dict=pg_subset_dict,
+        column_name_map=column_name_map,
     )
 
     for df in result.values():
@@ -134,4 +148,4 @@ def test_load_biological_data_with_subset_from_db(database_credentials, bio_data
             assert (df["species_code"] == 22500).all()
 
         if "ship" in df.columns:
-            assert (df["ship"] == 101).all()
\ No newline at end of file
+            assert (df["ship"] == 101).all()
diff --git a/echopop/tests/test_data/ingest/test_bio_data.sql b/echopop/tests/test_data/ingest/test_bio_data.sql
index cf950e30..26f728ad 100644
--- a/echopop/tests/test_data/ingest/test_bio_data.sql
+++ b/echopop/tests/test_data/ingest/test_bio_data.sql
@@ -81,4 +81,4 @@ INSERT INTO echopop_catch (ship, survey, haul_num, species_code, weight_in_haul,
 (101, 2024, 3, 22500, 230.000, 'Aleutian Wing Trawl', 5594),
 (102, 2024, 1, 150, 50.000, 'Aleutian Wing Trawl', 5594),
 (102, 2024, 2, 22500, 40.000, NULL, NULL),
-(101, 2025, 1, 206, 90.000, 'Aleutian Wing Trawl', NULL);
\ No newline at end of file
+(101, 2025, 1, 206, 90.000, 'Aleutian Wing Trawl', NULL);
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 9c498dea..cfc8649b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,4 +7,4 @@ pytest
 ruff
 tox
 testcontainers[postgresql]
-psycopg[binary]
\ No newline at end of file
+psycopg[binary]

From c0ccfd2d8f9051840d6ef3f3d015527173cee355 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 09:57:06 -0600
Subject: [PATCH 3/9] [389] Add sqlalchemy to env

---
 environment.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yaml b/environment.yaml
index 9332cd30..11044274 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -12,6 +12,7 @@ dependencies:
   - pandas
   - scipy
   - xarray>=2026.01.0
+  - sqlalchemy
   # Spatial stack
   - geopandas
   - geopy

From e06dd2503054ce1a0c685786e29e1939a41b6a1e Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 10:04:15 -0600
Subject: [PATCH 4/9] [389] Add sqlalchemy to pyproject and cross platform
 postgres to pr action

---
 .github/workflows/pr.yaml | 7 +++++++
 pyproject.toml            | 1 +
 2 files changed, 8 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a7432578..ed2a2522 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -41,6 +41,13 @@ jobs:
       uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python-version }}
+    - name: Set up cross-platform PostgreSQL
+      uses: ikalnytskyi/action-setup-postgres@v7
+      with:
+        username: test_user
+        password: postgres
+        database: test
+        port: 5432
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/pyproject.toml b/pyproject.toml
index 4562ebe5..57718233 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "scipy",
     "numba>=0.63.0b1",
     "xarray>=2026.01.0",
+    "sqlalchemy",
     # Spatial data processing stack
     "cartopy",
     "geopandas",

From ce9658b6f5426c01f8858b0825458c0e5dbab147 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 10:08:29 -0600
Subject: [PATCH 5/9] [389] Windows and Mac postgres support in pr action

---
 .github/workflows/pr.yaml | 22 +++++-----------------
 environment.yaml          |  1 -
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index ed2a2522..a63f363d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -19,21 +19,6 @@ jobs:
         os: [ubuntu-latest, windows-latest, macos-latest]
       fail-fast: false
 
-    services:
-      postgres:
-        image: postgres:16
-        env:
-          POSTGRES_USER: test_user
-          POSTGRES_PASSWORD: postgres
-          POSTGRES_DB: test
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          - 5432:5432
-
     steps:
     - name: Check out repository code
       uses: actions/checkout@v6
@@ -41,13 +26,16 @@ jobs:
       uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Set up cross-platform PostgreSQL
-      uses: ikalnytskyi/action-setup-postgres@v7
+    - name: Setup PostgreSQL Binaries
+      # GitHub action to set up postgreSQL for all 3 platforms
+      uses: ikalnytskyi/action-setup-postgres@v8
       with:
         username: test_user
         password: postgres
         database: test
         port: 5432
+        postgres-version: '14'
+      id: postgres
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/environment.yaml b/environment.yaml
index 11044274..9332cd30 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -12,7 +12,6 @@ dependencies:
   - pandas
   - scipy
   - xarray>=2026.01.0
-  - sqlalchemy
   # Spatial stack
   - geopandas
   - geopy

From 0f8fb0b4636ce44437cbb31b492a150d19bc1501 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 10:12:56 -0600
Subject: [PATCH 6/9] [389] Fix function type hints

---
 echopop/ingest/biological.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/echopop/ingest/biological.py b/echopop/ingest/biological.py
index dd3e8282..a6fcb41a 100644
--- a/echopop/ingest/biological.py
+++ b/echopop/ingest/biological.py
@@ -104,13 +104,13 @@ def load_single_biological_view(
 
 
 def load_biodata_db_views(
-    db_credentials: Dict[str, str],
-    biodata_table_map: Dict[str, str],
-    column_name_map: Dict[str, str] = None,
-    subset_dict: Optional[Dict] = None,
-    biodata_label_map: Optional[Dict[str, Dict]] = None,
-    haul_uid_config: Dict[str, Any] = {},
-) -> Dict[str, pd.DataFrame] | None:
+    db_credentials: dict[str, str],
+    biodata_table_map: dict[str, str],
+    column_name_map: dict[str, str] = None,
+    subset_dict: dict | None = None,
+    biodata_label_map: dict[str, dict] | None = None,
+    haul_uid_config: dict[str, Any] = {},
+) -> dict[str, pd.DataFrame] | None:
     """
     Load biological data from a postgres database.
 

From fb351295d43d18bc9b6d57785b69579e80e85843 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 10:18:41 -0600
Subject: [PATCH 7/9] [389] Fix for action

---
 echopop/ingest/biological.py                      | 7 ++++---
 echopop/tests/fixtures/fixtures_biodata_loader.py | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/echopop/ingest/biological.py b/echopop/ingest/biological.py
index a6fcb41a..28d616a7 100644
--- a/echopop/ingest/biological.py
+++ b/echopop/ingest/biological.py
@@ -109,7 +109,7 @@ def load_biodata_db_views(
     column_name_map: dict[str, str] = None,
     subset_dict: dict | None = None,
     biodata_label_map: dict[str, dict] | None = None,
-    haul_uid_config: dict[str, Any] = {},
+    haul_uid_config: dict[str, Any] = None,
 ) -> dict[str, pd.DataFrame] | None:
     """
     Load biological data from a postgres database.
@@ -122,7 +122,8 @@ def load_biodata_db_views(
         "user": "<USERNAME>", "password": "<PASSWORD>"})
     biodata_table_map : dict
         Dictionary mapping dataset names to database table names
-        (e.g., {"specimen": "biodata_specimen", "length": "biodata_length", "catch": "biodata_catch"})
+        (e.g., {"specimen": "biodata_specimen", "length": "biodata_length",
+        "catch": "biodata_catch"})
     column_name_map : dict, optional
         Dictionary mapping original column names to new column names
         (e.g., {"frequency": "length_count", "haul": "haul_num"})
@@ -190,7 +191,7 @@ def load_biodata_db_views(
         # Apply label mappings if provided
         if biodata_label_map:
             for col, mapping in biodata_label_map.items():
-                for name, df in biodata_dict.items():
+                for _name, df in biodata_dict.items():
                     if isinstance(df, pd.DataFrame) and col in df.columns:
                         df[col] = df[col].map(mapping).fillna(df[col])
 
diff --git a/echopop/tests/fixtures/fixtures_biodata_loader.py b/echopop/tests/fixtures/fixtures_biodata_loader.py
index 4c9c709f..077da27d 100644
--- a/echopop/tests/fixtures/fixtures_biodata_loader.py
+++ b/echopop/tests/fixtures/fixtures_biodata_loader.py
@@ -26,9 +26,10 @@ def postgres_container():
             "obj",
             (object,),
             {
-                "get_connection_url": lambda: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
-                "get_container_host_ip": lambda: "localhost",
-                "get_exposed_port": lambda x: 5432,
+                "get_connection_url": lambda
+                    self: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
+                "get_container_host_ip": lambda self: "localhost",
+                "get_exposed_port": lambda self, port: 5432,
             },
         )()
     else:

From 8fdd6738dbf895dd73816062742f0c0f594e5e18 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Mar 2026 16:19:38 +0000
Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 echopop/tests/fixtures/fixtures_biodata_loader.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/echopop/tests/fixtures/fixtures_biodata_loader.py b/echopop/tests/fixtures/fixtures_biodata_loader.py
index 077da27d..0334c9b2 100644
--- a/echopop/tests/fixtures/fixtures_biodata_loader.py
+++ b/echopop/tests/fixtures/fixtures_biodata_loader.py
@@ -26,8 +26,7 @@ def postgres_container():
             "obj",
             (object,),
             {
-                "get_connection_url": lambda
-                    self: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
+                "get_connection_url": lambda self: "postgresql+psycopg://test_user:postgres@localhost:5432/test",
                 "get_container_host_ip": lambda self: "localhost",
                 "get_exposed_port": lambda self, port: 5432,
             },

From 3b6d3dfabed7a41c368bb8f465bed6147ff36fd3 Mon Sep 17 00:00:00 2001
From: Dominic Bashford <dominic.bashford@noaa.gov>
Date: Wed, 18 Mar 2026 10:35:54 -0600
Subject: [PATCH 9/9] [389] Fix haul uid builder

---
 echopop/ingest/biological.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/echopop/ingest/biological.py b/echopop/ingest/biological.py
index 28d616a7..7b42a29b 100644
--- a/echopop/ingest/biological.py
+++ b/echopop/ingest/biological.py
@@ -206,7 +206,7 @@ def load_biodata_db_views(
 
         # Add UID labels
         _ = {
-            k: add_haul_uids(v, _dataset_type=f"biodata.{k}", **haul_uid_config)
+            k: add_haul_uids(v, _dataset_type=f"biodata.{k}", **(haul_uid_config or {}))
             for k, v in biodata_dict.items()
         }