diff --git a/src/mcp_tools/pipelines/anova.py b/src/mcp_tools/pipelines/anova.py index 7e5fb98..1b7fc77 100644 --- a/src/mcp_tools/pipelines/anova.py +++ b/src/mcp_tools/pipelines/anova.py @@ -4,7 +4,11 @@ from md_python.models.dataset_builders import MinimalDataset from md_python.models.dataset_builders._methods import ( + _APEGLM_SEED_RANGE, _DE_METHODS_PER_ENTITY, + _DESEQ2_ALPHA_RANGE, + _DESEQ2_LFC_SHRINKAGE, + _EDGER_NORM_METHODS, _ENTITY_TYPES, _de_method_key, ) @@ -196,26 +200,28 @@ def run_anova( f"Allowed: {sorted(allowed_de)}" ) if de_method == "edgeR": - if edger_norm_method not in {"TMM", "RLE", "upperquartile", "none"}: + if edger_norm_method not in _EDGER_NORM_METHODS: raise ValueError( - "edger_norm_method must be one of: TMM, RLE, upperquartile, none " - f"(got '{edger_norm_method}')" + "edger_norm_method must be one of: " + f"{sorted(_EDGER_NORM_METHODS)} (got '{edger_norm_method}')" ) if de_method == "DESeq2": - if deseq2_lfc_shrinkage not in {"none", "apeglm", "ashr", "normal"}: + if deseq2_lfc_shrinkage not in _DESEQ2_LFC_SHRINKAGE: raise ValueError( - "deseq2_lfc_shrinkage must be one of: none, apeglm, ashr, normal " - f"(got '{deseq2_lfc_shrinkage}')" + "deseq2_lfc_shrinkage must be one of: " + f"{sorted(_DESEQ2_LFC_SHRINKAGE)} (got '{deseq2_lfc_shrinkage}')" ) - if not 0.0 <= deseq2_alpha <= 1.0: + _alpha_lo, _alpha_hi = _DESEQ2_ALPHA_RANGE + if not _alpha_lo <= deseq2_alpha <= _alpha_hi: raise ValueError( - "deseq2_alpha must be between 0 and 1 " + f"deseq2_alpha must be between {_alpha_lo} and {_alpha_hi} " f"(got {deseq2_alpha})" ) if deseq2_lfc_shrinkage == "apeglm": - if not 0 <= apeglm_seed <= 2147483647: + _seed_lo, _seed_hi = _APEGLM_SEED_RANGE + if not _seed_lo <= apeglm_seed <= _seed_hi: raise ValueError( - "apeglm_seed must be between 0 and 2147483647 " + f"apeglm_seed must be between {_seed_lo} and {_seed_hi} " f"(got {apeglm_seed})" ) diff --git a/src/mcp_tools/pipelines/pairwise.py b/src/mcp_tools/pipelines/pairwise.py index ee84c98..9da60f8 100644 --- a/src/mcp_tools/pipelines/pairwise.py +++ b/src/mcp_tools/pipelines/pairwise.py @@ -5,6 +5,13 @@ from typing import Any, Dict, List, Optional from md_python.models.dataset_builders import PairwiseComparisonDataset +from md_python.models.dataset_builders._methods import ( + _APEGLM_SEED_RANGE, + _DE_METHODS_PER_ENTITY, + _DESEQ2_ALPHA_RANGE, + _DESEQ2_LFC_SHRINKAGE, + _EDGER_NORM_METHODS, +) from md_python.models.metadata import SampleMetadata from .. import mcp @@ -227,6 +234,49 @@ def run_pairwise_comparison( "matches what the server actually does. Re-confirm with the user.\n\n" ) + # DE method gating. Only entity_type='gene' accepts edgeR / DESeq2; + # protein/peptide/metabolite/ptm are limma-only. Validate here so an invalid + # combo fails fast with a clear message instead of being rejected downstream + # by the server. (run_pairwise_comparison_bulk routes through this function, + # so it inherits the gate.) Allowed values come from _methods.py; this block + # is intentionally kept separate from anova.py's so the two can diverge. + allowed_de = _DE_METHODS_PER_ENTITY.get(entity_type) + if allowed_de is None: + raise ValueError( + f"unknown entity_type '{entity_type}'. " + f"Allowed: {sorted(_DE_METHODS_PER_ENTITY)}" + ) + if de_method not in allowed_de: + raise ValueError( + f"de_method '{de_method}' not allowed for entity_type='{entity_type}'. " + f"Allowed: {sorted(allowed_de)}" + ) + if de_method == "edgeR": + if edger_norm_method not in _EDGER_NORM_METHODS: + raise ValueError( + "edger_norm_method must be one of: " + f"{sorted(_EDGER_NORM_METHODS)} (got '{edger_norm_method}')" + ) + if de_method == "DESeq2": + if deseq2_lfc_shrinkage not in _DESEQ2_LFC_SHRINKAGE: + raise ValueError( + "deseq2_lfc_shrinkage must be one of: " + f"{sorted(_DESEQ2_LFC_SHRINKAGE)} (got '{deseq2_lfc_shrinkage}')" + ) + _alpha_lo, _alpha_hi = _DESEQ2_ALPHA_RANGE + if not _alpha_lo <= deseq2_alpha <= _alpha_hi: + raise ValueError( + f"deseq2_alpha must be between {_alpha_lo} and {_alpha_hi} " + f"(got {deseq2_alpha})" + ) + if deseq2_lfc_shrinkage == "apeglm": + _seed_lo, _seed_hi = _APEGLM_SEED_RANGE + if not _seed_lo <= apeglm_seed <= _seed_hi: + raise ValueError( + f"apeglm_seed must be between {_seed_lo} and {_seed_hi} " + f"(got {apeglm_seed})" + ) + dataset_id = PairwiseComparisonDataset( input_dataset_ids=input_dataset_ids, dataset_name=dataset_name, diff --git a/src/md_python/models/dataset_builders/_methods.py b/src/md_python/models/dataset_builders/_methods.py index 5c64357..82536f8 100644 --- a/src/md_python/models/dataset_builders/_methods.py +++ b/src/md_python/models/dataset_builders/_methods.py @@ -133,6 +133,25 @@ def _batch_correction_technique_key(entity_type: str) -> str: "ptm": frozenset({"limma"}), } +# Companion-parameter vocabularies for the gene DE engines — the allowed values +# edgeR / DESeq2 accept. Mirror of MDFlexiComparisons process_r.py on +# 2026-05-27: the ``edger_norm_method`` / ``deseq2_lfc_shrinkage`` Literals and +# the ``deseq2_alpha`` / ``apeglm_seed`` numberrange ge/le bounds. +# +# These are the SINGLE source for the allowed values, so a sync check only has +# to compare this file against process_r.py. The per-tool gating in +# ``pairwise.py`` and ``anova.py`` reads these constants but keeps its own +# validation block — the two tools can validate differently or be updated in +# different orders without coupling. +_EDGER_NORM_METHODS: frozenset[str] = frozenset( + {"TMM", "RLE", "upperquartile", "none"} +) +_DESEQ2_LFC_SHRINKAGE: frozenset[str] = frozenset( + {"none", "apeglm", "ashr", "normal"} +) +_DESEQ2_ALPHA_RANGE: tuple[float, float] = (0.0, 1.0) # (ge, le) +_APEGLM_SEED_RANGE: tuple[int, int] = (0, 2147483647) # (ge, le) + def _de_method_key(entity_type: str) -> str: """Wire-format key for the per-entity de_method field.