diff --git a/biolearn/clinical/__init__.py b/biolearn/clinical/__init__.py
new file mode 100644
index 0000000..380b060
--- /dev/null
+++ b/biolearn/clinical/__init__.py
@@ -0,0 +1,4 @@
+from biolearn.clinical.registry import BIOMARKER_REGISTRY
+from biolearn.clinical.convert import convert_units
+
+__all__ = ["BIOMARKER_REGISTRY", "convert_units"]
diff --git a/biolearn/clinical/convert.py b/biolearn/clinical/convert.py
new file mode 100644
index 0000000..57729dd
--- /dev/null
+++ b/biolearn/clinical/convert.py
@@ -0,0 +1,99 @@
+"""Unit conversion utilities for clinical biomarker data."""
+
+import warnings
+import pandas as pd
+from biolearn.clinical.registry import BIOMARKER_REGISTRY
+
+
+def convert_units(df, source_units=None, units=None):
+    """Convert biomarker columns to canonical units.
+
+    Parameters
+    ----------
+    df : DataFrame
+        DataFrame with biomarker columns (samples as rows).
+    source_units : str, optional
+        Named source preset (e.g. 'ukbiobank'). Applies preset unit
+        mappings for all biomarkers from that source.
+    units : dict, optional
+        Per-biomarker unit overrides. Keys are biomarker names, values
+        are source unit strings (e.g. ``{"creatinine": "umol/L"}``).
+        Overrides any preset from ``source_units``.
+
+    Returns
+    -------
+    DataFrame
+        Copy of df with converted columns.
+
+    Raises
+    ------
+    ValueError
+        If a specified unit has no known conversion.
+    """
+    unit_map = {}
+    if source_units is not None:
+        unit_map.update(BIOMARKER_REGISTRY.get_source_preset(source_units))
+    if units is not None:
+        unit_map.update(units)
+
+    if not unit_map:
+        return df.copy()
+
+    result = df.copy()
+    for biomarker, source_unit in unit_map.items():
+        if biomarker not in result.columns:
+            continue
+        if biomarker not in BIOMARKER_REGISTRY:
+            warnings.warn(
+                f"Biomarker '{biomarker}' not in registry, skipping conversion."
+            )
+            continue
+
+        entry = BIOMARKER_REGISTRY.get(biomarker)
+        if source_unit == entry["unit"]:
+            continue  # already in canonical units
+
+        if source_unit not in entry["conversions"]:
+            raise ValueError(
+                f"No conversion from '{source_unit}' to '{entry['unit']}' "
+                f"for biomarker '{biomarker}'. "
+                f"Known source units: {list(entry['conversions'].keys())}"
+            )
+
+        converter = entry["conversions"][source_unit]
+        result[biomarker] = result[biomarker].apply(converter)
+
+    return result
+
+
+def validate_ranges(df, warn=True):
+    """Check biomarker values against expected ranges.
+
+    Parameters
+    ----------
+    df : DataFrame
+        DataFrame with biomarker columns (samples as rows).
+    warn : bool
+        If True, emit warnings for out-of-range values.
+
+    Returns
+    -------
+    dict
+        Mapping of biomarker name to count of out-of-range values.
+    """
+    out_of_range = {}
+    for col in df.columns:
+        if col not in BIOMARKER_REGISTRY:
+            continue
+        lo, hi = BIOMARKER_REGISTRY.valid_range(col)
+        mask = (df[col] < lo) | (df[col] > hi)
+        count = mask.sum()
+        if count > 0:
+            out_of_range[col] = int(count)
+            if warn:
+                warnings.warn(
+                    f"Biomarker '{col}': {count} values outside expected "
+                    f"range [{lo}, {hi}] (unit: {BIOMARKER_REGISTRY.canonical_unit(col)}). "
+                    f"Check units."
+                )
+    return out_of_range
diff --git a/biolearn/clinical/registry.py b/biolearn/clinical/registry.py
new file mode 100644
index 0000000..1172d74
--- /dev/null
+++ b/biolearn/clinical/registry.py
@@ -0,0 +1,206 @@
+"""Biomarker registry defining canonical names, units, valid ranges, and conversions.
+
+The canonical units match NHANES conventions established in biolearn.load.
+All clinical clocks expect data in these units.
+"""
+
+_REGISTRY = {
+    "albumin": {
+        "unit": "g/dL",
+        "range": (1.0, 6.0),
+        "description": "Serum albumin",
+        "conversions": {
+            "g/L": lambda x: x / 10.0,
+        },
+    },
+    "creatinine": {
+        "unit": "mg/dL",
+        "range": (0.1, 15.0),
+        "description": "Serum creatinine",
+        "conversions": {
+            "umol/L": lambda x: x / 88.42,
+        },
+    },
+    "glucose": {
+        "unit": "mmol/L",
+        "range": (1.0, 40.0),
+        "description": "Fasting glucose",
+        "conversions": {
+            "mg/dL": lambda x: x * 0.05551,
+        },
+    },
+    "c_reactive_protein": {
+        "unit": "mg/dL",
+        "range": (0.01, 30.0),
+        "description": "C-reactive protein",
+        "conversions": {
+            "mg/L": lambda x: x / 10.0,
+            "nmol/L": lambda x: x / 95.24,
+        },
+    },
+    "white_blood_cell_count": {
+        "unit": "1000 cells/uL",
+        "range": (1.0, 50.0),
+        "description": "White blood cell count",
+        "conversions": {},
+    },
+    "lymphocyte_percent": {
+        "unit": "%",
+        "range": (1.0, 80.0),
+        "description": "Lymphocyte percentage",
+        "conversions": {},
+    },
+    "red_blood_cell_distribution_width": {
+        "unit": "%",
+        "range": (8.0, 30.0),
+        "description": "Red blood cell distribution width",
+        "conversions": {},
+    },
+    "mean_cell_volume": {
+        "unit": "fL",
+        "range": (50.0, 130.0),
+        "description": "Mean corpuscular volume",
+        "conversions": {},
+    },
+    "alkaline_phosphate": {
+        "unit": "U/L",
+        "range": (10.0, 500.0),
+        "description": "Alkaline phosphatase",
+        "conversions": {},
+    },
+    "hdl_cholesterol": {
+        "unit": "mmol/L",
+        "range": (0.2, 5.0),
+        "description": "HDL cholesterol",
+        "conversions": {
+            "mg/dL": lambda x: x / 38.67,
+        },
+    },
+    "hemoglobin": {
+        "unit": "g/dL",
+        "range": (4.0, 22.0),
+        "description": "Hemoglobin",
+        "conversions": {
+            "g/L": lambda x: x / 10.0,
+        },
+    },
+    "platelet_count": {
+        "unit": "1000 cells/uL",
+        "range": (10.0, 1000.0),
+        "description": "Platelet count",
+        "conversions": {},
+    },
+    "mean_cell_hemoglobin": {
+        "unit": "pg",
+        "range": (15.0, 45.0),
+        "description": "Mean corpuscular hemoglobin",
+        "conversions": {},
+    },
+    "basophil_percent": {
+        "unit": "%",
+        "range": (0.0, 10.0),
+        "description": "Basophil percentage",
+        "conversions": {},
+    },
+    "lymphocyte_number": {
+        "unit": "1000 cells/uL",
+        "range": (0.1, 20.0),
+        "description": "Lymphocyte count",
+        "conversions": {},
+    },
+    "red_blood_cell_count": {
+        "unit": "million cells/uL",
+        "range": (1.0, 10.0),
+        "description": "Red blood cell count",
+        "conversions": {},
+    },
+}
+
+# Source unit presets for common data sources
+_SOURCE_PRESETS = {
+    "nhanes": {},  # NHANES already uses canonical units after load.py processing
+    "ukbiobank": {
+        "creatinine": "umol/L",
+        "c_reactive_protein": "mg/L",
+        "albumin": "g/L",
+        "hemoglobin": "g/L",
+        "hdl_cholesterol": "mg/dL",
+    },
+}
+
+
+class BiomarkerRegistry:
+    """Registry of canonical biomarker definitions for clinical clocks.
+
+    Provides lookup of units, valid ranges, and conversion functions
+    for all biomarkers used by clinical aging clocks.
+    """
+
+    def __init__(self, registry=None):
+        self._registry = registry or _REGISTRY
+
+    def get(self, name):
+        """Get biomarker definition by canonical name.
+
+        Parameters
+        ----------
+        name : str
+            Canonical biomarker name (e.g. 'albumin', 'creatinine').
+
+        Returns
+        -------
+        dict
+            Biomarker definition with keys: unit, range, description, conversions.
+
+        Raises
+        ------
+        KeyError
+            If the biomarker name is not in the registry.
+        """
+        if name not in self._registry:
+            raise KeyError(
+                f"Unknown biomarker: '{name}'. "
+                f"Known biomarkers: {', '.join(sorted(self._registry.keys()))}"
+            )
+        return self._registry[name]
+
+    def canonical_unit(self, name):
+        """Return the canonical unit for a biomarker."""
+        return self.get(name)["unit"]
+
+    def valid_range(self, name):
+        """Return the (min, max) valid range for a biomarker."""
+        return self.get(name)["range"]
+
+    def known_biomarkers(self):
+        """Return sorted list of all known biomarker names."""
+        return sorted(self._registry.keys())
+
+    def get_source_preset(self, source_name):
+        """Return unit mapping for a named data source.
+
+        Parameters
+        ----------
+        source_name : str
+            Source preset name (e.g. 'nhanes', 'ukbiobank').
+
+        Returns
+        -------
+        dict
+            Mapping of biomarker name to source unit string.
+        """
+        if source_name not in _SOURCE_PRESETS:
+            raise ValueError(
+                f"Unknown source preset: '{source_name}'. "
+                f"Known presets: {', '.join(sorted(_SOURCE_PRESETS.keys()))}"
+            )
+        return _SOURCE_PRESETS[source_name]
+
+    def __contains__(self, name):
+        return name in self._registry
+
+    def __len__(self):
+        return len(self._registry)
+
+
+BIOMARKER_REGISTRY = BiomarkerRegistry()
diff --git a/biolearn/data_library.py b/biolearn/data_library.py
index dc3eeae..4f2f034 100644
--- a/biolearn/data_library.py
+++ b/biolearn/data_library.py
@@ -183,6 +183,7 @@ def __init__(
         rna=None,
         protein_alamar=None,
         protein_olink=None,
+        clinical=None,
     ):
         """
         Initializes the GeoData instance.
@@ -190,12 +191,15 @@ def __init__(
         Args:
             metadata (DataFrame): Metadata associated with genomic samples.
             dnam (DataFrame): Methylation data associated with genomic samples.
+            clinical (DataFrame): Clinical biomarker data with features as rows
+                and samples as columns (same orientation as dnam).
         """
         self.metadata = metadata
         self.dnam = dnam
         self.rna = rna
         self.protein_alamar = protein_alamar
         self.protein_olink = protein_olink
+        self.clinical = clinical
 
     def _validate_metadata_omics_consistency(self):
         """Validate that metadata exists for all omics samples and vice versa."""
@@ -216,6 +220,9 @@ def _validate_metadata_omics_consistency(self):
         if self.protein_olink is not None:
             omics_samples.update(self.protein_olink.columns)
             omics_types.append("protein_olink")
+        if self.clinical is not None:
+            omics_samples.update(self.clinical.columns)
+            omics_types.append("clinical")
 
         if not omics_samples:
             return
@@ -266,6 +273,11 @@ def copy(self):
                 if self.protein_olink is not None
                 else None
             ),
+            clinical=(
+                self.clinical.copy(deep=True)
+                if self.clinical is not None
+                else None
+            ),
         )
 
     def quality_report(self, sites=None):
@@ -358,6 +370,60 @@ def from_methylation_matrix(cls, matrix):
 
         return cls(metadata, dnam)
 
+    @classmethod
+    def from_clinical_matrix(cls, df, source_units=None, units=None):
+        """Creates a GeoData instance from a clinical biomarker DataFrame.
+
+        Separates metadata columns (age, sex, mortality) from biomarker
+        columns, converts units if needed, and transposes biomarkers to
+        features-as-rows (matching GeoData's internal convention).
+
+        Parameters
+        ----------
+        df : DataFrame
+            DataFrame with samples as rows and biomarkers/metadata as columns.
+            Index should be sample identifiers.
+        source_units : str, optional
+            Named source preset for unit conversion (e.g. 'ukbiobank').
+        units : dict, optional
+            Per-biomarker unit overrides (e.g. ``{"creatinine": "umol/L"}``).
+
+        Returns
+        -------
+        GeoData
+            Instance with clinical and metadata layers populated.
+        """
+        from biolearn.clinical.convert import convert_units, validate_ranges
+
+        df = df.copy()
+
+        # Separate metadata columns from biomarker columns
+        metadata_cols = ["age", "sex", "is_dead", "months_until_death"]
+        existing_meta = [c for c in metadata_cols if c in df.columns]
+        biomarker_cols = [c for c in df.columns if c not in metadata_cols]
+
+        metadata = (
+            df[existing_meta]
+            if existing_meta
+            else pd.DataFrame(index=df.index)
+        )
+
+        biomarkers = df[biomarker_cols]
+
+        # Convert units if requested
+        if source_units is not None or units is not None:
+            biomarkers = convert_units(
+                biomarkers, source_units=source_units, units=units
+            )
+
+        # Warn about out-of-range values
+        validate_ranges(biomarkers, warn=True)
+
+        # Transpose to features-as-rows, samples-as-columns
+        clinical = biomarkers.T
+
+        return cls(metadata=metadata, clinical=clinical)
+
     def save_csv(self, folder_path, name):
         """
         Saves the GeoData instance to CSV files according to the DNA Methylation Array Data Standard V-2410.
@@ -410,6 +476,9 @@ def save_csv(self, folder_path, name):
                 folder_path, f"{name}_protein_olink.csv"
             )
             self.protein_olink.to_csv(protein_file)
+        if self.clinical is not None:
+            clinical_file = os.path.join(folder_path, f"{name}_clinical.csv")
+            self.clinical.to_csv(clinical_file)
 
     @classmethod
     def load_csv(cls, folder_path, name, series_part="all", validate=True):
@@ -509,12 +578,20 @@ def load_csv(cls, folder_path, name, series_part="all", validate=True):
             else None
         )
 
+        clinical_file = os.path.join(folder_path, f"{name}_clinical.csv")
+        clinical_df = (
+            pd.read_csv(clinical_file, index_col=0, skipinitialspace=True)
+            if os.path.exists(clinical_file)
+            else None
+        )
+
         geodata = cls(
             metadata_df,
             dnam=dnam_df,
             rna=rna_df,
             protein_alamar=protein_alamar_df,
             protein_olink=protein_olink_df,
+            clinical=clinical_df,
         )
 
         if validate and metadata_df is not None:
diff --git a/biolearn/load.py b/biolearn/load.py
index a044c82..e1e7fa6 100644
--- a/biolearn/load.py
+++ b/biolearn/load.py
@@ -208,3 +208,26 @@ def load_nhanes(year):
     )
     df = df.rename({"LB2RDW": "LBXRDW", "LB2WBCSI": "LBXWBCSI"}, axis=1)
     return df
+
+
+def load_nhanes_as_geodata(year):
+    """Load NHANES data and return as GeoData with clinical layer.
+
+    Calls ``load_nhanes(year)`` and wraps the result using
+    ``GeoData.from_clinical_matrix()`` so the data can be used
+    directly with clinical aging clocks via the ModelGallery.
+
+    Parameters
+    ----------
+    year : int
+        NHANES cycle year (2010 or 2012).
+
+    Returns
+    -------
+    GeoData
+        GeoData with ``clinical`` and ``metadata`` layers populated.
+    """
+    from biolearn.data_library import GeoData
+
+    df = load_nhanes(year)
+    return GeoData.from_clinical_matrix(df)
diff --git a/biolearn/model.py b/biolearn/model.py
index e4edf3a..f53f22f 100644
--- a/biolearn/model.py
+++ b/biolearn/model.py
@@ -1289,6 +1289,13 @@ def from_definition(cls, clock_definition):
             weights_path=weights_path, preprocess_file_path=preprocess_file
         )
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": list(self.reference),
+            "metadata": [],
+        }
+
     def methylation_sites(self):
         return list(self.reference)
 
@@ -1441,6 +1448,13 @@ def solve_qp(meth_vector, deconv_reference):
         # Return samples as rows to match other model outputs
         return cell_prop_df.T
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": list(self.reference.index),
+            "metadata": [],
+        }
+
     # returns required methylation sites
     def methylation_sites(self):
         return list(self.reference.index)
@@ -1514,6 +1528,19 @@ def predict(self, geo_data):
         # Return as a DataFrame
         return result.apply(self.transform).to_frame(name="Predicted")
 
+    def required_features(self):
+        """Return the data layer, features, and metadata this model needs.
+
+        Returns
+        -------
+        dict
+            ``{"layer": str, "features": list, "metadata": list}``
+        """
+        features = [
+            idx for idx in self.coefficients.index if idx != "intercept"
+        ]
+        return {"layer": "dnam", "features": features, "metadata": []}
+
     def _validate_required_features(self, matrix_data):
         return
 
@@ -1644,6 +1671,13 @@ def _get_data_matrix(self, geo_data):
         PCs = X_centered.T.dot(rotation)  # (samples × PCs)
         return PCs.T  # (PCs × samples)
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": self.methylation_sites(),
+            "metadata": [],
+        }
+
     def methylation_sites(self):
         """
         Return the list of required CpG sites.
@@ -1662,6 +1696,12 @@ class LinearTranscriptomicModel(LinearModel):
     def _get_data_matrix(self, geo_data):
         return geo_data.rna
 
+    def required_features(self):
+        features = [
+            idx for idx in self.coefficients.index if idx != "intercept"
+        ]
+        return {"layer": "rna", "features": features, "metadata": []}
+
 
 class GrimageModel:
     def __init__(self, coefficient_file, **details):
@@ -1803,6 +1843,13 @@ def methylation_sites(self):
         unique_vars = set(filtered_df["var"]) - {"Intercept", "Age", "Female"}
         return list(unique_vars)
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": self.methylation_sites(),
+            "metadata": ["age", "sex"],
+        }
+
 
 class LinearMultipartProteomicModel:
     def __init__(
@@ -1868,6 +1915,15 @@ def predict(self, geo_data):
         # Apply transformation to results
         return self.transform(pd.DataFrame(results))
 
+    def required_features(self):
+        proteins = list(
+            self.coefficients.loc[
+                self.coefficients["Protein"].str.lower() != "intercept",
+                "Protein",
+            ].unique()
+        )
+        return {"layer": "protein_olink", "features": proteins, "metadata": []}
+
     def methylation_sites(self):
         return []
 
@@ -1931,6 +1987,13 @@ def predict(self, geo_data):
 
         return pred_df
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": list(self.coefficients.index),
+            "metadata": [],
+        }
+
     def methylation_sites(self):
         return list(self.coefficients.index)
 
@@ -1970,6 +2033,13 @@ def predict(self, geo_data):
 
         return pd.DataFrame(vals, index=dnam.columns, columns=["Predicted"])
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": list(self.CpG_names),
+            "metadata": [],
+        }
+
     def methylation_sites(self):
         return list(self.CpG_names)
 
@@ -2232,6 +2302,13 @@ def predict(
         except Exception as e:
             raise Exception(f"API error: {str(e)}")
 
+    def required_features(self):
+        return {
+            "layer": "dnam",
+            "features": self.required_cpgs if self.required_cpgs else [],
+            "metadata": ["age", "sex"],
+        }
+
     def methylation_sites(self):
         """Return list of required CpG sites for imputation compatibility."""
         return self.required_cpgs if self.required_cpgs else []
@@ -2283,6 +2360,9 @@ def predict(self, geo_data):
             predictions, index=dnam.columns, columns=["Predicted"]
         )
 
+    def required_features(self):
+        return {"layer": "dnam", "features": self._sites, "metadata": []}
+
     def methylation_sites(self):
         return self._sites
 
@@ -2434,6 +2514,9 @@ def predict(self, geo_data):
             predictions, index=methylation_data.columns, columns=["Predicted"]
         )
 
+    def required_features(self):
+        return {"layer": "dnam", "features": self.cpg_sites, "metadata": []}
+
     def methylation_sites(self):
         """Return list of required CpG sites"""
         return self.cpg_sites
diff --git a/biolearn/test/test_clinical_layer.py b/biolearn/test/test_clinical_layer.py
new file mode 100644
index 0000000..7b77c9d
--- /dev/null
+++ b/biolearn/test/test_clinical_layer.py
@@ -0,0 +1,171 @@
+import os
+import numpy as np
+import pandas as pd
+import pytest
+from biolearn.data_library import GeoData
+
+
+def _make_clinical_df():
+    """Create a small clinical DataFrame (samples as rows)."""
+    return pd.DataFrame(
+        {
+            "age": [45, 62, 38],
+            "sex": [1, 0, 1],
+            "albumin": [4.2, 3.8, 4.5],
+            "creatinine": [0.9, 1.1, 0.8],
+            "glucose": [5.1, 6.2, 4.8],
+            "white_blood_cell_count": [6.5, 8.0, 5.5],
+            "lymphocyte_percent": [30.0, 25.0, 35.0],
+            "mean_cell_volume": [88.0, 92.0, 86.0],
+            "red_blood_cell_distribution_width": [12.5, 14.0, 12.0],
+            "alkaline_phosphate": [65.0, 80.0, 55.0],
+        },
+        index=["P1", "P2", "P3"],
+    )
+
+
+def test_geodata_with_clinical_layer():
+    """GeoData accepts a clinical DataFrame."""
+    clinical = pd.DataFrame(
+        {"P1": [4.2, 0.9], "P2": [3.8, 1.1]},
+        index=["albumin", "creatinine"],
+    )
+    metadata = pd.DataFrame({"age": [45, 62]}, index=["P1", "P2"])
+    geo = GeoData(metadata=metadata, clinical=clinical)
+
+    assert geo.clinical is not None
+    assert list(geo.clinical.columns) == ["P1", "P2"]
+    assert list(geo.clinical.index) == ["albumin", "creatinine"]
+    assert geo.dnam is None
+
+
+def test_geodata_clinical_defaults_to_none():
+    """Clinical layer defaults to None for backward compatibility."""
+    metadata = pd.DataFrame({"age": [45]}, index=["P1"])
+    geo = GeoData(metadata=metadata)
+    assert geo.clinical is None
+
+
+def test_from_clinical_matrix_basic():
+    """from_clinical_matrix separates metadata and transposes biomarkers."""
+    df = _make_clinical_df()
+    geo = GeoData.from_clinical_matrix(df)
+
+    # Metadata should contain age and sex
+    assert "age" in geo.metadata.columns
+    assert "sex" in geo.metadata.columns
+    assert len(geo.metadata) == 3
+
+    # Clinical should be features-as-rows, samples-as-columns
+    assert geo.clinical is not None
+    assert set(geo.clinical.columns) == {"P1", "P2", "P3"}
+    assert "albumin" in geo.clinical.index
+    assert "creatinine" in geo.clinical.index
+
+    # Metadata columns should NOT be in clinical
+    assert "age" not in geo.clinical.index
+    assert "sex" not in geo.clinical.index
+
+
+def test_from_clinical_matrix_preserves_values():
+    """Values survive the transpose correctly."""
+    df = _make_clinical_df()
+    geo = GeoData.from_clinical_matrix(df)
+
+    assert geo.clinical.loc["albumin", "P1"] == 4.2
+    assert geo.clinical.loc["creatinine", "P2"] == 1.1
+    assert geo.metadata.loc["P1", "age"] == 45
+
+
+def test_from_clinical_matrix_no_metadata_cols():
+    """Works when input has no metadata columns."""
+    df = pd.DataFrame(
+        {"albumin": [4.2, 3.8], "creatinine": [0.9, 1.1]},
+        index=["P1", "P2"],
+    )
+    geo = GeoData.from_clinical_matrix(df)
+
+    assert len(geo.metadata.columns) == 0
+    assert geo.clinical is not None
+    assert "albumin" in geo.clinical.index
+
+
+def test_from_clinical_matrix_unit_conversion():
+    """Unit conversion via the units parameter works."""
+    df = pd.DataFrame(
+        {"creatinine": [79.56, 97.24]},  # umol/L values
+        index=["P1", "P2"],
+    )
+    geo = GeoData.from_clinical_matrix(df, units={"creatinine": "umol/L"})
+
+    # Should be converted to mg/dL (divide by 88.42)
+    converted = geo.clinical.loc["creatinine", "P1"]
+    assert abs(converted - 79.56 / 88.42) < 0.01
+
+
+def test_from_clinical_matrix_source_preset():
+    """source_units preset applies correct conversions."""
+    df = pd.DataFrame(
+        {
+            "albumin": [42.0],  # g/L (UK Biobank)
+            "creatinine": [88.42],  # umol/L
+        },
+        index=["P1"],
+    )
+    geo = GeoData.from_clinical_matrix(df, source_units="ukbiobank")
+
+    # albumin: 42 g/L -> 4.2 g/dL
+    assert abs(geo.clinical.loc["albumin", "P1"] - 4.2) < 0.01
+    # creatinine: 88.42 umol/L -> 1.0 mg/dL
+    assert abs(geo.clinical.loc["creatinine", "P1"] - 1.0) < 0.01
+
+
+def test_copy_preserves_clinical():
+    """GeoData.copy() deep-copies the clinical layer."""
+    df = _make_clinical_df()
+    geo = GeoData.from_clinical_matrix(df)
+    geo_copy = geo.copy()
+
+    # Modify original
+    geo.clinical.iloc[0, 0] = -999
+
+    # Copy should be unaffected
+    assert geo_copy.clinical.iloc[0, 0] != -999
+
+
+def test_save_load_roundtrip_with_clinical(tmp_path):
+    """Clinical data survives save_csv / load_csv roundtrip."""
+    df = _make_clinical_df()
+    geo = GeoData.from_clinical_matrix(df)
+
+    folder = str(tmp_path)
+    geo.save_csv(folder, "test")
+
+    # Verify clinical file was created
+    assert os.path.exists(os.path.join(folder, "test_clinical.csv"))
+
+    # Load it back
+    loaded = GeoData.load_csv(folder, "test", validate=False)
+    assert loaded.clinical is not None
+    assert set(loaded.clinical.index) == set(geo.clinical.index)
+    assert set(loaded.clinical.columns) == set(geo.clinical.columns)
+
+    # Values should match
+    pd.testing.assert_frame_equal(
+        loaded.clinical.sort_index(axis=0).sort_index(axis=1),
+        geo.clinical.sort_index(axis=0).sort_index(axis=1),
+        atol=1e-10,
+    )
+
+
+def test_validate_metadata_omics_includes_clinical():
+    """Validation includes clinical samples in consistency check."""
+    clinical = pd.DataFrame(
+        {"P1": [4.2], "P2": [3.8], "P3": [4.5]},
+        index=["albumin"],
+    )
+    metadata = pd.DataFrame({"age": [45, 62]}, index=["P1", "P2"])
+    geo = GeoData(metadata=metadata, clinical=clinical)
+
+    with pytest.warns(UserWarning, match="without metadata"):
+        geo._validate_metadata_omics_consistency()
diff --git a/biolearn/test/test_registry.py b/biolearn/test/test_registry.py
new file mode 100644
index 0000000..cf4b3f4
--- /dev/null
+++ b/biolearn/test/test_registry.py
@@ -0,0 +1,109 @@
+import pandas as pd
+import pytest
+from biolearn.clinical.registry import BIOMARKER_REGISTRY
+from biolearn.clinical.convert import convert_units, validate_ranges
+
+
+class TestBiomarkerRegistry:
+    def test_known_biomarkers_not_empty(self):
+        assert len(BIOMARKER_REGISTRY) > 0
+
+    def test_get_albumin(self):
+        entry = BIOMARKER_REGISTRY.get("albumin")
+        assert entry["unit"] == "g/dL"
+        assert "range" in entry
+        assert "conversions" in entry
+
+    def test_get_unknown_raises(self):
+        with pytest.raises(KeyError, match="Unknown biomarker"):
+            BIOMARKER_REGISTRY.get("nonexistent_biomarker")
+
+    def test_canonical_unit(self):
+        assert BIOMARKER_REGISTRY.canonical_unit("glucose") == "mmol/L"
+        assert BIOMARKER_REGISTRY.canonical_unit("creatinine") == "mg/dL"
+
+    def test_valid_range(self):
+        lo, hi = BIOMARKER_REGISTRY.valid_range("albumin")
+        assert lo < hi
+
+    def test_contains(self):
+        assert "albumin" in BIOMARKER_REGISTRY
+        assert "fake_marker" not in BIOMARKER_REGISTRY
+
+    def test_known_biomarkers_list(self):
+        names = BIOMARKER_REGISTRY.known_biomarkers()
+        assert isinstance(names, list)
+        assert "albumin" in names
+        assert names == sorted(names)  # should be sorted
+
+    def test_source_preset_nhanes(self):
+        preset = BIOMARKER_REGISTRY.get_source_preset("nhanes")
+        assert isinstance(preset, dict)
+
+    def test_source_preset_ukbiobank(self):
+        preset = BIOMARKER_REGISTRY.get_source_preset("ukbiobank")
+        assert "creatinine" in preset
+        assert preset["creatinine"] == "umol/L"
+
+    def test_source_preset_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown source preset"):
+            BIOMARKER_REGISTRY.get_source_preset("fake_source")
+
+
+class TestConvertUnits:
+    def test_creatinine_umol_to_mg(self):
+        df = pd.DataFrame({"creatinine": [88.42]}, index=["P1"])
+        result = convert_units(df, units={"creatinine": "umol/L"})
+        assert abs(result.loc["P1", "creatinine"] - 1.0) < 0.01
+
+    def test_albumin_g_per_l_to_g_per_dl(self):
+        df = pd.DataFrame({"albumin": [42.0]}, index=["P1"])
+        result = convert_units(df, units={"albumin": "g/L"})
+        assert abs(result.loc["P1", "albumin"] - 4.2) < 0.01
+
+    def test_no_conversion_returns_copy(self):
+        df = pd.DataFrame({"albumin": [4.2]}, index=["P1"])
+        result = convert_units(df)
+        pd.testing.assert_frame_equal(result, df)
+        assert result is not df  # should be a different object
+
+    def test_already_canonical_no_change(self):
+        df = pd.DataFrame({"creatinine": [1.0]}, index=["P1"])
+        result = convert_units(df, units={"creatinine": "mg/dL"})
+        assert result.loc["P1", "creatinine"] == 1.0
+
+    def test_unknown_unit_raises(self):
+        df = pd.DataFrame({"creatinine": [1.0]}, index=["P1"])
+        with pytest.raises(ValueError, match="No conversion"):
+            convert_units(df, units={"creatinine": "fake_unit"})
+
+    def test_missing_column_skipped(self):
+        df = pd.DataFrame({"albumin": [4.2]}, index=["P1"])
+        result = convert_units(df, units={"creatinine": "umol/L"})
+        assert "albumin" in result.columns
+
+    def test_source_preset(self):
+        df = pd.DataFrame(
+            {"creatinine": [88.42], "albumin": [42.0]}, index=["P1"]
+        )
+        result = convert_units(df, source_units="ukbiobank")
+        assert abs(result.loc["P1", "creatinine"] - 1.0) < 0.01
+        assert abs(result.loc["P1", "albumin"] - 4.2) < 0.01
+
+
+class TestValidateRanges:
+    def test_in_range_no_warnings(self):
+        df = pd.DataFrame({"albumin": [4.0]}, index=["P1"])
+        result = validate_ranges(df, warn=False)
+        assert len(result) == 0
+
+    def test_out_of_range_detected(self):
+        df = pd.DataFrame({"albumin": [0.1]}, index=["P1"])  # below range
+        result = validate_ranges(df, warn=False)
+        assert "albumin" in result
+        assert result["albumin"] == 1
+
+    def test_unknown_columns_ignored(self):
+        df = pd.DataFrame({"unknown_col": [999]}, index=["P1"])
+        result = validate_ranges(df, warn=False)
+        assert len(result) == 0
diff --git a/biolearn/test/test_required_features.py b/biolearn/test/test_required_features.py
new file mode 100644
index 0000000..ae0baba
--- /dev/null
+++ b/biolearn/test/test_required_features.py
@@ -0,0 +1,85 @@
+import pytest
+from biolearn import model
+from biolearn.model_gallery import ModelGallery
+
+gallery = ModelGallery()
+
+
+@pytest.mark.parametrize(
+    "model_name, model_entry", model.model_definitions.items()
+)
+def test_required_features_interface(model_name, model_entry):
+    """Every model must implement required_features() with the correct shape."""
+    model_type = model_entry["model"]["type"]
+
+    # Skip types that can't be instantiated without special setup
+    if model_type in ["NotImplemented", "HurdleAPIModel"]:
+        pytest.skip(f"Model type {model_type} requires special setup")
+
+    model_class = getattr(model, model_type)
+    instance = model_class.from_definition(model_entry)
+
+    result = instance.required_features()
+
+    # Validate return format
+    assert isinstance(
+        result, dict
+    ), f"{model_name}: required_features() must return a dict"
+    assert (
+        "layer" in result
+    ), f"{model_name}: required_features() must include 'layer'"
+    assert (
+        "features" in result
+    ), f"{model_name}: required_features() must include 'features'"
+    assert (
+        "metadata" in result
+    ), f"{model_name}: required_features() must include 'metadata'"
+
+    assert isinstance(
+        result["layer"], str
+    ), f"{model_name}: 'layer' must be a string"
+    assert isinstance(
+        result["features"], list
+    ), f"{model_name}: 'features' must be a list"
+    assert isinstance(
+        result["metadata"], list
+    ), f"{model_name}: 'metadata' must be a list"
+
+    # Layer should be one of the known GeoData layers
+    valid_layers = {
+        "dnam",
+        "rna",
+        "protein_alamar",
+        "protein_olink",
+        "clinical",
+    }
+    assert (
+        result["layer"] in valid_layers
+    ), f"{model_name}: 'layer' must be one of {valid_layers}, got '{result['layer']}'"
+
+
+@pytest.mark.parametrize(
+    "model_name, model_entry", model.model_definitions.items()
+)
+def test_required_features_consistency_with_methylation_sites(
+    model_name, model_entry
+):
+    """For dnam models, required_features() features should match methylation_sites()."""
+    model_type = model_entry["model"]["type"]
+
+    if model_type in ["NotImplemented", "HurdleAPIModel"]:
+        pytest.skip(f"Model type {model_type} requires special setup")
+
+    model_class = getattr(model, model_type)
+    instance = model_class.from_definition(model_entry)
+
+    if not hasattr(instance, "methylation_sites"):
+        pytest.skip(f"{model_name} does not have methylation_sites()")
+
+    result = instance.required_features()
+    sites = instance.methylation_sites()
+
+    if result["layer"] == "dnam" and sites:
+        assert set(result["features"]) == set(
+            sites
+        ), f"{model_name}: required_features() features should match methylation_sites()"