Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions src/mcp_tools/pipelines/anova.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

from md_python.models.dataset_builders import MinimalDataset
from md_python.models.dataset_builders._methods import (
_APEGLM_SEED_RANGE,
_DE_METHODS_PER_ENTITY,
_DESEQ2_ALPHA_RANGE,
_DESEQ2_LFC_SHRINKAGE,
_EDGER_NORM_METHODS,
_ENTITY_TYPES,
_de_method_key,
)
Expand Down Expand Up @@ -196,26 +200,28 @@ def run_anova(
f"Allowed: {sorted(allowed_de)}"
)
if de_method == "edgeR":
if edger_norm_method not in {"TMM", "RLE", "upperquartile", "none"}:
if edger_norm_method not in _EDGER_NORM_METHODS:
raise ValueError(
"edger_norm_method must be one of: TMM, RLE, upperquartile, none "
f"(got '{edger_norm_method}')"
"edger_norm_method must be one of: "
f"{sorted(_EDGER_NORM_METHODS)} (got '{edger_norm_method}')"
)
if de_method == "DESeq2":
if deseq2_lfc_shrinkage not in {"none", "apeglm", "ashr", "normal"}:
if deseq2_lfc_shrinkage not in _DESEQ2_LFC_SHRINKAGE:
raise ValueError(
"deseq2_lfc_shrinkage must be one of: none, apeglm, ashr, normal "
f"(got '{deseq2_lfc_shrinkage}')"
"deseq2_lfc_shrinkage must be one of: "
f"{sorted(_DESEQ2_LFC_SHRINKAGE)} (got '{deseq2_lfc_shrinkage}')"
)
if not 0.0 <= deseq2_alpha <= 1.0:
_alpha_lo, _alpha_hi = _DESEQ2_ALPHA_RANGE
if not _alpha_lo <= deseq2_alpha <= _alpha_hi:
raise ValueError(
"deseq2_alpha must be between 0 and 1 "
f"deseq2_alpha must be between {_alpha_lo} and {_alpha_hi} "
f"(got {deseq2_alpha})"
)
if deseq2_lfc_shrinkage == "apeglm":
if not 0 <= apeglm_seed <= 2147483647:
_seed_lo, _seed_hi = _APEGLM_SEED_RANGE
if not _seed_lo <= apeglm_seed <= _seed_hi:
raise ValueError(
"apeglm_seed must be between 0 and 2147483647 "
f"apeglm_seed must be between {_seed_lo} and {_seed_hi} "
f"(got {apeglm_seed})"
)

Expand Down
50 changes: 50 additions & 0 deletions src/mcp_tools/pipelines/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from typing import Any, Dict, List, Optional

from md_python.models.dataset_builders import PairwiseComparisonDataset
from md_python.models.dataset_builders._methods import (
_APEGLM_SEED_RANGE,
_DE_METHODS_PER_ENTITY,
_DESEQ2_ALPHA_RANGE,
_DESEQ2_LFC_SHRINKAGE,
_EDGER_NORM_METHODS,
)
from md_python.models.metadata import SampleMetadata

from .. import mcp
Expand Down Expand Up @@ -227,6 +234,49 @@ def run_pairwise_comparison(
"matches what the server actually does. Re-confirm with the user.\n\n"
)

# DE method gating. Only entity_type='gene' accepts edgeR / DESeq2;
# protein/peptide/metabolite/ptm are limma-only. Validate here so an invalid
# combo fails fast with a clear message instead of being rejected downstream
# by the server. (run_pairwise_comparison_bulk routes through this function,
# so it inherits the gate.) Allowed values come from _methods.py; this block
# is intentionally kept separate from anova.py's so the two can diverge.
allowed_de = _DE_METHODS_PER_ENTITY.get(entity_type)
if allowed_de is None:
raise ValueError(
f"unknown entity_type '{entity_type}'. "
f"Allowed: {sorted(_DE_METHODS_PER_ENTITY)}"
)
if de_method not in allowed_de:
raise ValueError(
f"de_method '{de_method}' not allowed for entity_type='{entity_type}'. "
f"Allowed: {sorted(allowed_de)}"
)
if de_method == "edgeR":
if edger_norm_method not in _EDGER_NORM_METHODS:
raise ValueError(
"edger_norm_method must be one of: "
f"{sorted(_EDGER_NORM_METHODS)} (got '{edger_norm_method}')"
)
if de_method == "DESeq2":
if deseq2_lfc_shrinkage not in _DESEQ2_LFC_SHRINKAGE:
raise ValueError(
"deseq2_lfc_shrinkage must be one of: "
f"{sorted(_DESEQ2_LFC_SHRINKAGE)} (got '{deseq2_lfc_shrinkage}')"
)
_alpha_lo, _alpha_hi = _DESEQ2_ALPHA_RANGE
if not _alpha_lo <= deseq2_alpha <= _alpha_hi:
raise ValueError(
f"deseq2_alpha must be between {_alpha_lo} and {_alpha_hi} "
f"(got {deseq2_alpha})"
)
if deseq2_lfc_shrinkage == "apeglm":
_seed_lo, _seed_hi = _APEGLM_SEED_RANGE
if not _seed_lo <= apeglm_seed <= _seed_hi:
raise ValueError(
f"apeglm_seed must be between {_seed_lo} and {_seed_hi} "
f"(got {apeglm_seed})"
)

dataset_id = PairwiseComparisonDataset(
input_dataset_ids=input_dataset_ids,
dataset_name=dataset_name,
Expand Down
19 changes: 19 additions & 0 deletions src/md_python/models/dataset_builders/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,25 @@ def _batch_correction_technique_key(entity_type: str) -> str:
"ptm": frozenset({"limma"}),
}

# Companion-parameter vocabularies for the gene DE engines — the allowed values
# edgeR / DESeq2 accept. Mirror of MDFlexiComparisons process_r.py on
# 2026-05-27: the ``edger_norm_method`` / ``deseq2_lfc_shrinkage`` Literals and
# the ``deseq2_alpha`` / ``apeglm_seed`` numberrange ge/le bounds.
#
# These are the SINGLE source for the allowed values, so a sync check only has
# to compare this file against process_r.py. The per-tool gating in
# ``pairwise.py`` and ``anova.py`` reads these constants but keeps its own
# validation block — the two tools can validate differently or be updated in
# different orders without coupling.
_EDGER_NORM_METHODS: frozenset[str] = frozenset(
{"TMM", "RLE", "upperquartile", "none"}
)
_DESEQ2_LFC_SHRINKAGE: frozenset[str] = frozenset(
{"none", "apeglm", "ashr", "normal"}
)
_DESEQ2_ALPHA_RANGE: tuple[float, float] = (0.0, 1.0) # (ge, le)
_APEGLM_SEED_RANGE: tuple[int, int] = (0, 2147483647) # (ge, le)


def _de_method_key(entity_type: str) -> str:
"""Wire-format key for the per-entity de_method field.
Expand Down