From b1e49ff94171c3b9c5105291ec147c4d06e72ee5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 03:41:05 +0000
Subject: [PATCH 1/9] Add target projection and selectivity ratio PLS
 diagnostics

Two predictive-importance diagnostics next to vip(): target_projection()
forms the single latent component along a response's regression vector
(Kvalheim and Karstang 1989), and selectivity_ratio() reports each
feature's explained-to-residual variance ratio on that component
(Rajalahti et al. 2009). Both are bound as PLS convenience methods and
exported from multivariate.methods. The selectivity ratio ranks
predictive relevance but, by design, gives near-identical scores to two
collinear features, so it does not break ties within a collinear group.
---
 .../multivariate/_diagnostics.py              | 235 ++++++++++++++++++
 src/process_improve/multivariate/_pls.py      |   9 +
 src/process_improve/multivariate/methods.py   |   4 +
 3 files changed, 248 insertions(+)
diff --git a/src/process_improve/multivariate/_diagnostics.py b/src/process_improve/multivariate/_diagnostics.py
index c008c00f..1f7359b8 100644
--- a/src/process_improve/multivariate/_diagnostics.py
+++ b/src/process_improve/multivariate/_diagnostics.py
@@ -14,7 +14,9 @@
 
 import numpy as np
 import pandas as pd
+from scipy.stats import f as f_dist
 from sklearn.base import BaseEstimator
+from sklearn.utils import Bunch
 
 from ._common import DataMatrix, _align_to_fit_features, epsqrt
 from ._preprocessing import center
@@ -104,6 +106,239 @@ def vip(model: BaseEstimator, n_components: int | None = None) -> pd.Series:
     return pd.Series(vip_values, index=weights.index, name="VIP")
 
 
+def _select_response(beta: pd.DataFrame, response: str | int | None) -> str:
+    """Resolve a response selector to a single column label of ``beta``."""
+    columns = list(beta.columns)
+    if response is None:
+        if len(columns) != 1:
+            msg = (
+                "This model has several responses "
+                f"({columns}); pass response=<name> to pick one."
+            )
+            raise ValueError(msg)
+        return columns[0]
+    if isinstance(response, (int, np.integer)) and response not in columns:
+        if not (0 <= int(response) < len(columns)):
+            msg = f"response index {response} is out of range for {len(columns)} responses."
+            raise ValueError(msg)
+        return columns[int(response)]
+    if response not in columns:
+        msg = f"response {response!r} is not one of the model responses {columns}."
+        raise ValueError(msg)
+    return response  # type: ignore[return-value]
+
+
+def _target_projection_arrays(
+    model: BaseEstimator, X: DataMatrix, response: str | int | None
+) -> tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, str]:
+    """Return ``(X_aligned, t_tp, p_tp, w_tp, response_label)`` for the TP component.
+
+    The target-projection direction is the (unit-normalised) regression vector
+    ``b`` for the chosen response, read from ``model.beta_coefficients_``; the
+    scores are ``t = X w_tp`` and the loadings ``p = X^T t / (t^T t)``. ``X``
+    must be preprocessed the same way as the training data (the same convention
+    as :func:`t2_contributions` / :func:`spe_contributions`).
+    """
+    if not hasattr(model, "beta_coefficients_"):
+        msg = "Model is not fitted, or is not a PLS model with 'beta_coefficients_'."
+        raise ValueError(msg)
+    beta = model.beta_coefficients_
+    label = _select_response(beta, response)
+    b = beta[label].to_numpy(dtype=float)
+    norm_b = float(np.sqrt(b @ b))
+    if norm_b <= epsqrt:
+        msg = f"The regression vector for response {label!r} is ~0; it is not predicted by X."
+        raise ValueError(msg)
+    w_tp = b / norm_b
+
+    if not isinstance(X, pd.DataFrame):
+        X = pd.DataFrame(X)
+    X = _align_to_fit_features(X, beta.index)
+    X_values = X.to_numpy(dtype=float)
+    t_tp = X_values @ w_tp
+    ttt = float(t_tp @ t_tp)
+    if ttt <= epsqrt:
+        msg = "The target-projected scores have ~0 variance; cannot form the TP loading."
+        raise ValueError(msg)
+    p_tp = (X_values.T @ t_tp) / ttt
+    return X, t_tp, p_tp, w_tp, label
+
+
+def target_projection(model: BaseEstimator, X: DataMatrix, response: str | int | None = None) -> Bunch:
+    r"""Target-projected (TP) component of a fitted PLS model for one response.
+
+    Target projection (Kvalheim and Karstang, 1989) rotates the PLS solution so
+    that a *single* latent component carries all of the predictive information
+    for one response. The component points along the regression vector
+    :math:`b` (the column of ``beta_coefficients_`` for that response):
+
+    .. math::
+
+        w_{\text{TP}} = \frac{b}{\lVert b \rVert}, \qquad
+        t_{\text{TP}} = X\, w_{\text{TP}}, \qquad
+        p_{\text{TP}} = \frac{X^\top t_{\text{TP}}}{t_{\text{TP}}^\top t_{\text{TP}}}.
+
+    The TP component is the basis for the selectivity ratio
+    (:func:`selectivity_ratio`).
+
+    Parameters
+    ----------
+    model : PLS
+        A fitted PLS model (must expose ``beta_coefficients_``).
+    X : array-like of shape (n_samples, n_features)
+        Preprocessed data, scaled the same way as the training data (for
+        example with :class:`MCUVScaler`).
+    response : str or int or None, default=None
+        Which response (Y column) to project onto. ``None`` is allowed only for
+        a single-response model; otherwise pass the response label (or its
+        integer position).
+
+    Returns
+    -------
+    sklearn.utils.Bunch
+        With fields ``scores`` (pd.Series, the TP scores per sample),
+        ``loadings`` (pd.Series, the TP loading per feature), ``weights``
+        (pd.Series, the unit TP weight per feature) and ``response`` (the
+        resolved response label).
+
+    Raises
+    ------
+    ValueError
+        If the model is not a fitted PLS, the response selector is invalid, or
+        the regression vector / TP scores are degenerate (~0).
+
+    References
+    ----------
+    Kvalheim, O. M. and Karstang, T. V. (1989). Interpretation of latent-variable
+    regression models. *Chemometrics and Intelligent Laboratory Systems*, 7(1-2),
+    39-51.
+
+    Examples
+    --------
+    >>> pls = PLS(n_components=3).fit(X_scaled, y_scaled)
+    >>> tp = pls.target_projection(X_scaled)        # bound convenience method
+    >>> tp.scores.head()
+
+    See Also
+    --------
+    selectivity_ratio : Per-variable explained/residual ratio on the TP component.
+    """
+    X_df, t_tp, p_tp, w_tp, label = _target_projection_arrays(model, X, response)
+    return Bunch(
+        scores=pd.Series(t_tp, index=X_df.index, name="TP score"),
+        loadings=pd.Series(p_tp, index=X_df.columns, name="TP loading"),
+        weights=pd.Series(w_tp, index=X_df.columns, name="TP weight"),
+        response=label,
+    )
+
+
+def _selectivity_ratio_one(
+    model: BaseEstimator, X: DataMatrix, response: str | int | None, conf_level: float
+) -> pd.Series:
+    """Compute the selectivity ratio per feature for a single response (public-API helper)."""
+    X_df, t_tp, p_tp, _w_tp, label = _target_projection_arrays(model, X, response)
+    X_values = X_df.to_numpy(dtype=float)
+    ttt = float(t_tp @ t_tp)
+    ss_explained = (p_tp**2) * ttt  # per-feature explained sum of squares on the TP component
+    residuals = X_values - np.outer(t_tp, p_tp)
+    ss_residual = (residuals**2).sum(axis=0)
+    with np.errstate(invalid="ignore", divide="ignore"):
+        sr = ss_explained / ss_residual
+    sr[~np.isfinite(sr)] = 0.0  # a feature with no residual (or no signal) gets 0 here
+
+    series = pd.Series(sr, index=X_df.columns, name="selectivity_ratio")
+    # F-based critical value (Rajalahti et al., 2009): SR_j above this is
+    # "significant" at conf_level, with N-2 and N-3 degrees of freedom. The
+    # sensory layer prefers a permutation test, so this is advisory metadata.
+    n_samples = X_values.shape[0]
+    if n_samples > 3:
+        series.attrs["f_critical"] = float(f_dist.ppf(conf_level, dfn=n_samples - 2, dfd=n_samples - 3))
+    else:
+        series.attrs["f_critical"] = float("nan")
+    series.attrs["conf_level"] = float(conf_level)
+    series.attrs["response"] = label
+    return series
+
+
+def selectivity_ratio(
+    model: BaseEstimator,
+    X: DataMatrix,
+    response: str | int | None = None,
+    *,
+    conf_level: float = 0.95,
+) -> pd.Series | pd.DataFrame:
+    r"""Compute the selectivity ratio of each feature on the target-projected component.
+
+    The selectivity ratio (Rajalahti et al., 2009) ranks each feature by how
+    much of its variance the *predictive* (target-projected) direction explains.
+    On the TP component (:func:`target_projection`), for feature :math:`j`:
+
+    .. math::
+
+        \text{SR}_j = \frac{\text{SS}_{\text{explained},j}}
+                           {\text{SS}_{\text{residual},j}}
+                    = \frac{p_{\text{TP},j}^2\, (t_{\text{TP}}^\top t_{\text{TP}})}
+                           {\sum_i (x_{ij} - t_{\text{TP},i}\, p_{\text{TP},j})^2}.
+
+    A large SR means the feature is well aligned with the predictive direction.
+    Unlike VIP, it is a true explained/residual variance ratio and can be
+    compared against an F distribution. Note that two collinear features carry
+    near-identical SR: the selectivity ratio ranks predictive relevance, it does
+    not break ties between mutually collinear features.
+
+    Parameters
+    ----------
+    model : PLS
+        A fitted PLS model (must expose ``beta_coefficients_``).
+    X : array-like of shape (n_samples, n_features)
+        Preprocessed data, scaled the same way as the training data (for
+        example with :class:`MCUVScaler`).
+    response : str or int or None, default=None
+        Which response to compute SR for. ``None`` returns a feature-by-response
+        DataFrame when the model has several responses, or a Series for a
+        single-response model.
+    conf_level : float, default=0.95
+        Confidence level for the advisory F-based critical value, attached to
+        the result's ``.attrs["f_critical"]``.
+
+    Returns
+    -------
+    pd.Series or pd.DataFrame
+        Selectivity ratios indexed by feature. A Series for one response (with
+        ``f_critical`` / ``conf_level`` / ``response`` in ``.attrs``), or a
+        feature-by-response DataFrame when ``response`` is ``None`` and the
+        model has several responses.
+
+    Raises
+    ------
+    ValueError
+        If the model is not a fitted PLS or the response selector is invalid.
+
+    References
+    ----------
+    Rajalahti, T., Arneberg, R., Berven, F. S., Myhr, K.-M., Ulvik, R. J. and
+    Kvalheim, O. M. (2009). Biomarker discovery in mass spectral profiles by
+    means of selectivity ratio plot. *Chemometrics and Intelligent Laboratory
+    Systems*, 95(1), 35-48.
+
+    Examples
+    --------
+    >>> pls = PLS(n_components=3).fit(X_scaled, y_scaled)
+    >>> pls.selectivity_ratio(X_scaled).sort_values(ascending=False).head()
+
+    See Also
+    --------
+    target_projection : The target-projected component the ratio is built on.
+    vip : Variable Importance in Projection, an alternative importance measure.
+    """
+    beta = getattr(model, "beta_coefficients_", None)
+    if response is None and beta is not None and beta.shape[1] > 1:
+        return pd.DataFrame(
+            {col: _selectivity_ratio_one(model, X, col, conf_level) for col in beta.columns}
+        )
+    return _selectivity_ratio_one(model, X, response, conf_level)
+
+
 def squared_cosine(model: BaseEstimator, n_components: int | None = None) -> pd.DataFrame:
     r"""Calculate the squared cosine (cos2): quality of representation of observations.
 
diff --git a/src/process_improve/multivariate/_pls.py b/src/process_improve/multivariate/_pls.py
index 5e03d112..8c160a12 100644
--- a/src/process_improve/multivariate/_pls.py
+++ b/src/process_improve/multivariate/_pls.py
@@ -37,6 +37,12 @@
     _select_n_components,
     epsqrt,
 )
+from ._diagnostics import (
+    selectivity_ratio as _selectivity_ratio,
+)
+from ._diagnostics import (
+    target_projection as _target_projection,
+)
 from ._nipals import quick_regress, ssq, terminate_check
 from ._preprocessing import MCUVScaler
 from .plots import (
@@ -285,6 +291,9 @@ def get_feature_names_out(self, input_features=None) -> np.ndarray:  # noqa: ANN
     # supplies its own rename map.
     predictions_vs_observed_plot = _model_method(_predictions_vs_observed_plot)
     coefficient_plot = _model_method(_coefficient_plot)
+    # PLS-specific predictive-importance diagnostics (need beta_coefficients_).
+    target_projection = _model_method(_target_projection)
+    selectivity_ratio = _model_method(_selectivity_ratio)
 
     _ATTRIBUTE_RENAMES: typing.ClassVar[dict[str, str]] = {
         "x_scores": "scores_",
diff --git a/src/process_improve/multivariate/methods.py b/src/process_improve/multivariate/methods.py
index c2ca5676..5e756c4a 100644
--- a/src/process_improve/multivariate/methods.py
+++ b/src/process_improve/multivariate/methods.py
@@ -23,9 +23,11 @@
     project_variables,
     rv2_coefficient,
     rv_coefficient,
+    selectivity_ratio,
     spe_contributions,
     squared_cosine,
     t2_contributions,
+    target_projection,
     vip,
 )
 from ._limits import (
@@ -100,6 +102,7 @@
     "scale",
     "score_limit",
     "score_plot",
+    "selectivity_ratio",
     "spe_calculation",
     "spe_contributions",
     "spe_limit",
@@ -108,6 +111,7 @@
     "ssq",
     "t2_contributions",
     "t2_plot",
+    "target_projection",
     "terminate_check",
     "vip",
 ]

From 4023d2edaf253151f81727e9352053417217033f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 03:45:04 +0000
Subject: [PATCH 2/9] Test target projection and selectivity ratio

Synthetic driver/proxy/noise block: SR is high for the predictive
direction (driver and its collinear proxy), low for noise, and gives
near-identical scores to the two collinear features (the limitation the
discriminator narrative rests on). TP scores correlate with the
response. Adds multi-response, error-path, and LDPE real-data checks.
---
 tests/test_multivariate.py | 104 +++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/tests/test_multivariate.py b/tests/test_multivariate.py
index b7f88805..bc9eff39 100644
--- a/tests/test_multivariate.py
+++ b/tests/test_multivariate.py
@@ -45,11 +45,13 @@
     rv_coefficient,
     scale,
     score_limit,
+    selectivity_ratio,
     spe_calculation,
     spe_contributions,
     squared_cosine,
     ssq,
     t2_contributions,
+    target_projection,
     terminate_check,
     vip,
 )
@@ -2226,6 +2228,108 @@ def test_vip_unfitted_raises() -> None:
         vip(pca_unfitted)
 
 
+def _driver_proxy_noise(seed: int = 0, n: int = 40) -> tuple[pd.DataFrame, pd.Series]:
+    """Synthetic block: a driver of y, a collinear proxy of the driver, and pure noise."""
+    rng = np.random.default_rng(seed)
+    driver = rng.standard_normal(n)
+    X = pd.DataFrame(
+        {
+            "driver": driver,
+            "proxy": driver + 0.02 * rng.standard_normal(n),  # collinear with driver
+            "noise1": rng.standard_normal(n),
+            "noise2": rng.standard_normal(n),
+        }
+    )
+    y = pd.Series(2.0 * driver + 0.3 * rng.standard_normal(n), name="y")
+    return X, y
+
+
+def test_selectivity_ratio_ranks_driver_and_proxy_above_noise() -> None:
+    """SR is high for the predictive direction (driver and its collinear proxy), low for noise."""
+    X, y = _driver_proxy_noise()
+    Xs = MCUVScaler().fit_transform(X)
+    ys = MCUVScaler().fit_transform(y.to_frame())
+    pls = PLS(n_components=2).fit(Xs, ys)
+
+    sr_standalone = selectivity_ratio(pls, Xs)
+    sr_bound = pls.selectivity_ratio(Xs)
+    assert sr_standalone.equals(sr_bound)
+
+    assert isinstance(sr_standalone, pd.Series)
+    assert sr_standalone.name == "selectivity_ratio"
+    assert list(sr_standalone.index) == list(X.columns)
+    assert (sr_standalone >= 0).all()
+    assert np.isfinite(sr_standalone).all()
+
+    # Driver and its collinear proxy carry the predictive signal; noise does not.
+    assert sr_standalone["driver"] > 1.0
+    assert sr_standalone["proxy"] > 1.0
+    assert sr_standalone["noise1"] < 0.5
+    assert sr_standalone["noise2"] < 0.5
+
+    # The selectivity ratio does NOT separate two collinear features (the key
+    # limitation that the discriminator narrative rests on).
+    assert sr_standalone["driver"] == pytest.approx(sr_standalone["proxy"], rel=0.5)
+
+    # The advisory F-based critical value is attached and exceeded by the signal.
+    assert sr_standalone.attrs["conf_level"] == pytest.approx(0.95)
+    assert sr_standalone["driver"] > sr_standalone.attrs["f_critical"]
+    assert sr_standalone["noise1"] < sr_standalone.attrs["f_critical"]
+
+
+def test_target_projection_aligns_with_response() -> None:
+    """The TP scores reproduce the predictive direction (correlate ~1 with y)."""
+    X, y = _driver_proxy_noise()
+    Xs = MCUVScaler().fit_transform(X)
+    ys = MCUVScaler().fit_transform(y.to_frame())
+    pls = PLS(n_components=2).fit(Xs, ys)
+
+    tp = pls.target_projection(Xs)
+    assert tp.scores.equals(target_projection(pls, Xs).scores)  # bound and standalone agree
+    assert set(tp) >= {"scores", "loadings", "weights", "response"}
+    assert list(tp.loadings.index) == list(X.columns)
+    assert tp.weights.to_numpy() @ tp.weights.to_numpy() == pytest.approx(1.0)  # unit TP weight
+
+    corr = np.corrcoef(tp.scores.to_numpy(), ys.to_numpy().ravel())[0, 1]
+    assert abs(corr) > 0.95
+
+
+def test_selectivity_ratio_multi_response_and_errors(
+    fixture_pls_ldpe_example: dict[str, pd.DataFrame | np.ndarray | float | int],
+) -> None:
+    """Multi-response SR returns a feature-by-response frame; errors are guarded; LDPE smoke."""
+    # Multi-response: a feature-by-response DataFrame, with each column finite.
+    rng = np.random.default_rng(7)
+    X = pd.DataFrame(rng.standard_normal((40, 5)), columns=[f"x{i}" for i in range(5)])
+    beta = rng.standard_normal((5, 2))
+    Y = pd.DataFrame(X.values @ beta + 0.3 * rng.standard_normal((40, 2)), columns=["y0", "y1"])
+    Xs = MCUVScaler().fit_transform(X)
+    Ys = MCUVScaler().fit_transform(Y)
+    pls = PLS(n_components=3).fit(Xs, Ys)
+
+    sr_frame = pls.selectivity_ratio(Xs)
+    assert isinstance(sr_frame, pd.DataFrame)
+    assert list(sr_frame.columns) == ["y0", "y1"]
+    assert list(sr_frame.index) == list(X.columns)
+    assert np.isfinite(sr_frame.to_numpy()).all()
+
+    # Selecting one response gives a Series; an unknown response raises.
+    assert isinstance(pls.selectivity_ratio(Xs, response="y0"), pd.Series)
+    with pytest.raises(ValueError, match="response"):
+        pls.selectivity_ratio(Xs, response="not_a_response")
+    with pytest.raises(ValueError, match=r"not fitted|beta_coefficients_"):
+        selectivity_ratio(PLS(n_components=2), Xs)
+
+    # Real LDPE dataset (SIMCA reference data): SR is finite and non-negative.
+    data = fixture_pls_ldpe_example
+    X_ldpe = MCUVScaler().fit_transform(data["X"])
+    Y_ldpe = MCUVScaler().fit_transform(data["Y"])
+    pls_ldpe = PLS(n_components=data["A"]).fit(X_ldpe, Y_ldpe)
+    sr_values = pls_ldpe.selectivity_ratio(X_ldpe).to_numpy()
+    assert np.isfinite(sr_values).all()
+    assert (sr_values >= 0).all()
+
+
 def test_vip_formula_correctness() -> None:
     """VIP formula: verify against manually computed values."""
     # Simple synthetic case with known PLS model internals

From 0a8335b9c7f20a96b9cf63ef4813f62f972e44bd Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 03:54:19 +0000
Subject: [PATCH 3/9] Add cross-validated discriminator to the observational
 relate step

discriminate_observational adds out-of-sample evidence on top of the
marginal associations: a per-attribute cross-validated Q-squared gate
(reusing PLS.select_n_components), a selectivity ratio per descriptor on
the target-projected predictive direction with a permutation p-value
(Benjamini-Hochberg corrected per attribute), and a collinear-cluster id
so proxies that ride on a driver are reported as one inseparable group.

The permutation loop is skipped for attributes the Q-squared gate finds
unpredictable. Surfaced as a 'discriminator' key on the relate output and
threaded through analyze_descriptive. Existing fast tests opt out via
discriminator=False.
---
 src/process_improve/sensory/__init__.py |   2 +
 src/process_improve/sensory/analysis.py | 236 +++++++++++++++++++++++-
 tests/test_sensory.py                   |  12 +-
 3 files changed, 240 insertions(+), 10 deletions(-)

diff --git a/src/process_improve/sensory/__init__.py b/src/process_improve/sensory/__init__.py
index ed8473ea..04922231 100644
--- a/src/process_improve/sensory/__init__.py
+++ b/src/process_improve/sensory/__init__.py
@@ -19,6 +19,7 @@
     AnalysisResult,
     aggregate_to_product,
     analyze_descriptive,
+    discriminate_observational,
     product_means,
     relate_designed,
     relate_observational,
@@ -42,6 +43,7 @@
     "align_scores",
     "analyze_descriptive",
     "apply_correction",
+    "discriminate_observational",
     "mixed_assessor_model",
     "panel_scorecard",
     "product_means",
diff --git a/src/process_improve/sensory/analysis.py b/src/process_improve/sensory/analysis.py
index 65e13dff..a1860f66 100644
--- a/src/process_improve/sensory/analysis.py
+++ b/src/process_improve/sensory/analysis.py
@@ -29,7 +29,7 @@
 import pandas as pd
 from scipy.stats import pearsonr
 
-from process_improve.multivariate.methods import PCA, PLS, vip
+from process_improve.multivariate.methods import PCA, PLS, MCUVScaler, selectivity_ratio, vip
 from process_improve.sensory.mam import MAMResult, align_scores, mixed_assessor_model
 from process_improve.sensory.panel import PanelScorecard, apply_correction, panel_scorecard
 from process_improve.sensory.validation import ValidationResult
@@ -109,6 +109,200 @@ def _attach_fdr(records: list[dict[str, Any]], alpha: float) -> list[dict[str, A
     return records
 
 
+def _collinear_clusters(x_block: pd.DataFrame, threshold: float) -> dict[str, int]:
+    """Group descriptors into clusters of mutually high absolute correlation.
+
+    Single-linkage connected components on the descriptor ``|corr|`` matrix:
+    two descriptors join the same cluster when their absolute Pearson
+    correlation is at least ``threshold``. Returns ``{descriptor: cluster_id}``
+    with cluster ids assigned in column order (a singleton descriptor gets its
+    own id). Collinear proxies therefore share an id, which is how the
+    discriminator reports that they cannot be told apart.
+    """
+    cols = list(x_block.columns)
+    n = len(cols)
+    corr = np.nan_to_num(x_block.corr().abs().to_numpy(), nan=0.0)
+    parent = list(range(n))
+
+    def find(i: int) -> int:
+        while parent[i] != i:
+            parent[i] = parent[parent[i]]
+            i = parent[i]
+        return i
+
+    for i in range(n):
+        for j in range(i + 1, n):
+            if corr[i, j] >= threshold:
+                ri, rj = find(i), find(j)
+                if ri != rj:
+                    parent[max(ri, rj)] = min(ri, rj)
+
+    roots: dict[int, int] = {}
+    cluster_of: dict[str, int] = {}
+    for i, name in enumerate(cols):
+        root = find(i)
+        if root not in roots:
+            roots[root] = len(roots)
+        cluster_of[str(name)] = roots[root]
+    return cluster_of
+
+
+def discriminate_observational(  # noqa: PLR0913
+    agg: pd.DataFrame,
+    covariates: pd.DataFrame,
+    *,
+    n_components: int = 2,
+    alpha: float = 0.05,
+    n_permutations: int = 199,
+    random_state: int = 0,
+    cluster_threshold: float = 0.95,
+    max_components_cv: int = 4,
+) -> dict[str, Any]:
+    """Cross-validated discriminator of which descriptors carry predictive signal.
+
+    The marginal associations (:func:`relate_observational`) flag every
+    descriptor that correlates with an attribute in-sample, genuine drivers and
+    proxies alike. This step adds out-of-sample evidence:
+
+    1. a per-attribute cross-validated Q-squared gate (is the attribute
+       predictable from the descriptor block at all),
+    2. a selectivity ratio per descriptor on the target-projected predictive
+       direction, with a permutation p-value (Benjamini-Hochberg corrected
+       across the whole family), so a descriptor that merely correlates by
+       chance but does not enter the predictive direction is demoted, and
+    3. a collinear-cluster id per descriptor.
+
+    What it cannot do is rank descriptors *within* a collinear cluster: two
+    descriptors that carry the same information predict equally well out of
+    sample, so they share a cluster id and both stay significant. Separating
+    them needs an external dataset or a designed experiment.
+
+    Parameters
+    ----------
+    agg : pandas.DataFrame
+        Product-by-attribute mean table (index ``product``).
+    covariates : pandas.DataFrame
+        One row per product with the measured descriptors (plus a ``product``
+        column, which is dropped here).
+    n_components : int
+        Latent components for the in-sample selectivity-ratio fit.
+    alpha : float
+        Target false-discovery rate for the permutation family.
+    n_permutations : int
+        Number of label permutations for the selectivity-ratio null.
+    random_state : int
+        Seed for the permutations and the cross-validation folds.
+    cluster_threshold : float
+        Absolute-correlation threshold for the collinear clustering.
+    max_components_cv : int
+        Cap on the component count the Q-squared gate may select.
+
+    Returns
+    -------
+    dict
+        ``per_attribute`` (the Q-squared gate per attribute), ``descriptors``
+        (the per attribute-descriptor selectivity ratio, permutation q-value,
+        ``discriminator_significant`` flag and ``cluster_id``), ``clusters``
+        (the descriptor-to-cluster map), and the settings used.
+    """
+    x_all = covariates.loc[agg.index]
+    descriptors = [c for c in x_all.columns if c != "product" and pd.api.types.is_numeric_dtype(x_all[c])]
+    x_block = x_all[descriptors].astype(float)
+    clusters = _collinear_clusters(x_block, cluster_threshold)
+
+    rng = np.random.default_rng(random_state)
+    per_attribute: list[dict[str, Any]] = []
+    records: list[dict[str, Any]] = []
+    for attr in agg.columns:
+        y = agg[attr].astype(float)
+        mask = y.notna()
+        x_attr = x_block.loc[mask]
+        y_attr = y.loc[mask]
+        n_rows = x_attr.shape[0]
+
+        # 1. Cross-validated Q-squared gate (raw inputs; the splitter scales inside folds).
+        cap = max(1, min(max_components_cv, x_attr.shape[1], n_rows - 2))
+        predictable = False
+        q2_cv = float("nan")
+        rmsep_cv = float("nan")
+        a = max(1, min(n_components, cap))
+        if n_rows >= 5 and y_attr.std() > 0:
+            sel = PLS.select_n_components(
+                x_attr,
+                y_attr,
+                max_components=cap,
+                cv=5,
+                n_repeats=3,
+                random_state=random_state,
+            )
+            a = int(sel.n_components)
+            q2_cv = float(sel.r2y_validated.loc[a, "total"])
+            rmsep_cv = float(sel.rmsecv.loc[a, "total"])
+            predictable = q2_cv > 0.0
+        per_attribute.append(
+            {
+                "attribute": str(attr),
+                "n_components_cv": a,
+                "q2_cv": q2_cv,
+                "rmsep_cv": rmsep_cv,
+                "predictable": bool(predictable),
+            }
+        )
+
+        # 2. Selectivity ratio on the predictive direction, with a permutation
+        #    null. The permutation loop is the costly part, so it is skipped for
+        #    attributes the Q-squared gate already found unpredictable: none of
+        #    their descriptors can be flagged anyway.
+        x_scaled = MCUVScaler().fit_transform(x_attr)
+        y_scaled = MCUVScaler().fit_transform(y_attr.to_frame())
+        pls = PLS(n_components=a).fit(x_scaled, y_scaled)
+        sr_obs = selectivity_ratio(pls, x_scaled).reindex(descriptors)
+        if predictable:
+            y_values = y_scaled.to_numpy().ravel()
+            ge_counts = np.zeros(len(descriptors))
+            for _ in range(n_permutations):
+                permuted = pd.DataFrame(
+                    y_values[rng.permutation(n_rows)], index=x_scaled.index, columns=y_scaled.columns
+                )
+                pls_p = PLS(n_components=a).fit(x_scaled, permuted)
+                sr_p = selectivity_ratio(pls_p, x_scaled).reindex(descriptors).to_numpy()
+                ge_counts += sr_p >= sr_obs.to_numpy()
+            perm_p = (ge_counts + 1.0) / (n_permutations + 1.0)
+        else:
+            perm_p = np.ones(len(descriptors))
+        for i, desc in enumerate(descriptors):
+            records.append(
+                {
+                    "attribute": str(attr),
+                    "descriptor": str(desc),
+                    "selectivity_ratio": float(sr_obs[desc]),
+                    "p_value": float(perm_p[i]),
+                    "cluster_id": clusters[str(desc)],
+                    "_predictable": bool(predictable),
+                }
+            )
+
+    # 3. Benjamini-Hochberg per attribute (each attribute is its own family of
+    #    tests), then gate the significance flag on the attribute being
+    #    predictable out of sample.
+    by_attribute: dict[str, list[dict[str, Any]]] = {}
+    for rec in records:
+        by_attribute.setdefault(rec["attribute"], []).append(rec)
+    for group in by_attribute.values():
+        _attach_fdr(group, alpha)
+    for rec in records:
+        rec["discriminator_significant"] = bool(rec.pop("significant") and rec.pop("_predictable"))
+
+    return {
+        "per_attribute": per_attribute,
+        "descriptors": records,
+        "clusters": clusters,
+        "alpha": alpha,
+        "n_permutations": n_permutations,
+        "cluster_threshold": cluster_threshold,
+    }
+
+
 def relate_designed(
     agg: pd.DataFrame,
     covariates: pd.DataFrame,
@@ -135,12 +329,15 @@ def relate_designed(
     )
 
 
-def relate_observational(
+def relate_observational(  # noqa: PLR0913
     agg: pd.DataFrame,
     covariates: pd.DataFrame,
     *,
     n_components: int = 2,
     alpha: float = 0.05,
+    discriminator: bool = True,
+    n_permutations: int = 199,
+    random_state: int = 0,
 ) -> dict[str, Any]:
     """Relate the attribute block to measured descriptors with PLS plus correlations."""
     x_block = covariates.loc[agg.index].astype(float)
@@ -166,13 +363,23 @@ def relate_observational(
                     {"attribute": str(attr), "descriptor": str(desc), "r": float(r), "p_value": float(p)}
                 )
     assoc = _attach_fdr(assoc, alpha)
-    return {
+    result: dict[str, Any] = {
         "mode": "observational",
         "n_components": max_comp,
         "alpha": alpha,
         "vip": drivers,
         "associations": assoc,
     }
+    if discriminator:
+        result["discriminator"] = discriminate_observational(
+            agg,
+            covariates.loc[agg.index],
+            n_components=max_comp,
+            alpha=alpha,
+            n_permutations=n_permutations,
+            random_state=random_state,
+        )
+    return result
 
 
 def product_means(panel: pd.DataFrame, conf_level: float = 0.95) -> pd.DataFrame:
@@ -213,6 +420,9 @@ def analyze_descriptive(  # noqa: PLR0913
     n_components: int = 2,
     conf_level: float = 0.95,
     alpha: float = 0.05,
+    discriminator: bool = True,
+    n_permutations: int = 199,
+    random_state: int = 0,
 ) -> AnalysisResult:
     """Run the descriptive pipeline: panel check, correction, and relate.
 
@@ -241,6 +451,13 @@ def analyze_descriptive(  # noqa: PLR0913
         Confidence level for the product-mean intervals.
     alpha : float
         Target false-discovery rate for the relate step.
+    discriminator : bool
+        Whether to run the cross-validated discriminator
+        (:func:`discriminate_observational`) in the observational relate step.
+    n_permutations : int
+        Permutations for the discriminator's selectivity-ratio null.
+    random_state : int
+        Seed for the discriminator's permutations and cross-validation folds.
 
     Returns
     -------
@@ -277,7 +494,15 @@ def analyze_descriptive(  # noqa: PLR0913
     if validated.mode == "designed":
         relate = relate_designed(agg, validated.covariates, model=model, alpha=alpha)
     else:
-        relate = relate_observational(agg, validated.covariates, n_components=n_components, alpha=alpha)
+        relate = relate_observational(
+            agg,
+            validated.covariates,
+            n_components=n_components,
+            alpha=alpha,
+            discriminator=discriminator,
+            n_permutations=n_permutations,
+            random_state=random_state,
+        )
 
     return AnalysisResult(
         mode=validated.mode,
@@ -295,6 +520,9 @@ def analyze_descriptive(  # noqa: PLR0913
             "n_components": n_components,
             "conf_level": conf_level,
             "alpha": alpha,
+            "discriminator": discriminator,
+            "n_permutations": n_permutations,
+            "random_state": random_state,
             "content_hash": validated.content_hash,
         },
     )
diff --git a/tests/test_sensory.py b/tests/test_sensory.py
index 91c9506e..672d75a7 100644
--- a/tests/test_sensory.py
+++ b/tests/test_sensory.py
@@ -181,8 +181,8 @@ def test_scorecard_clean_panel_has_no_flags():
 
 def test_dropping_panelist_changes_means():
     validated = validate_descriptive(_panel(), _obs(), mode="observational")
-    kept = analyze_descriptive(validated, drop_panelists=None)
-    dropped = analyze_descriptive(validated, drop_panelists="auto")
+    kept = analyze_descriptive(validated, drop_panelists=None, discriminator=False)
+    dropped = analyze_descriptive(validated, drop_panelists="auto", discriminator=False)
     assert "P8" in dropped.dropped
     assert kept.product_means.shape == dropped.product_means.shape
     merged = kept.product_means.merge(
@@ -208,7 +208,7 @@ def test_relate_designed_is_stub():
 
 def test_relate_observational_finds_descriptor():
     validated = validate_descriptive(_panel(), _obs(), mode="observational")
-    result = analyze_descriptive(validated)
+    result = analyze_descriptive(validated, discriminator=False)
     assoc = pd.DataFrame(result.relate["associations"])
     a_sodium = assoc[(assoc["attribute"] == "A") & (assoc["descriptor"] == "sodium")].iloc[0]
     a_fat = assoc[(assoc["attribute"] == "A") & (assoc["descriptor"] == "fat")].iloc[0]
@@ -219,7 +219,7 @@ def test_relate_observational_finds_descriptor():
 
 def test_relate_observational_q_values_monotone():
     validated = validate_descriptive(_panel(), _obs(), mode="observational")
-    result = analyze_descriptive(validated)
+    result = analyze_descriptive(validated, discriminator=False)
     assoc = pd.DataFrame(result.relate["associations"]).sort_values("p_value")
     assert np.all(np.diff(assoc["q_value"].to_numpy()) >= -1e-12)
 
@@ -316,8 +316,8 @@ def test_analyze_correction_align_changes_means_and_reports_mam():
     panel = _scaling_panel()
     obs = pd.DataFrame({"product": sorted(panel["product"].unique()), "d": range(panel["product"].nunique())})
     validated = validate_descriptive(panel, obs, mode="observational")
-    none = analyze_descriptive(validated, correction="none")
-    aligned = analyze_descriptive(validated, correction="align")
+    none = analyze_descriptive(validated, correction="none", discriminator=False)
+    aligned = analyze_descriptive(validated, correction="align", discriminator=False)
     assert aligned.correction == "align"
     assert not aligned.mam.scaling.empty
     merged = none.product_means.merge(

From a28c5a99472e21f39807da5bf6a2885d4973a342 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 03:56:53 +0000
Subject: [PATCH 4/9] Expose the discriminator through the analyze tool; fix
 mypy types

Adds discriminator / n_permutations / random_state options to the
sensory_analyze_descriptive tool input and mentions the discriminator
output in the tool description. Passes a DataFrame to select_n_components
and indexes the selectivity ratio by position so mypy is satisfied.
---
 src/process_improve/sensory/analysis.py |  7 ++++---
 src/process_improve/sensory/tools.py    | 20 +++++++++++++++++++-
 tests/test_sensory.py                   |  1 +
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/process_improve/sensory/analysis.py b/src/process_improve/sensory/analysis.py
index a1860f66..5f7e70a3 100644
--- a/src/process_improve/sensory/analysis.py
+++ b/src/process_improve/sensory/analysis.py
@@ -229,7 +229,7 @@ def discriminate_observational(  # noqa: PLR0913
         if n_rows >= 5 and y_attr.std() > 0:
             sel = PLS.select_n_components(
                 x_attr,
-                y_attr,
+                y_attr.to_frame(),
                 max_components=cap,
                 cv=5,
                 n_repeats=3,
@@ -257,6 +257,7 @@ def discriminate_observational(  # noqa: PLR0913
         y_scaled = MCUVScaler().fit_transform(y_attr.to_frame())
         pls = PLS(n_components=a).fit(x_scaled, y_scaled)
         sr_obs = selectivity_ratio(pls, x_scaled).reindex(descriptors)
+        sr_values = sr_obs.to_numpy()
         if predictable:
             y_values = y_scaled.to_numpy().ravel()
             ge_counts = np.zeros(len(descriptors))
@@ -266,7 +267,7 @@ def discriminate_observational(  # noqa: PLR0913
                 )
                 pls_p = PLS(n_components=a).fit(x_scaled, permuted)
                 sr_p = selectivity_ratio(pls_p, x_scaled).reindex(descriptors).to_numpy()
-                ge_counts += sr_p >= sr_obs.to_numpy()
+                ge_counts += sr_p >= sr_values
             perm_p = (ge_counts + 1.0) / (n_permutations + 1.0)
         else:
             perm_p = np.ones(len(descriptors))
@@ -275,7 +276,7 @@ def discriminate_observational(  # noqa: PLR0913
                 {
                     "attribute": str(attr),
                     "descriptor": str(desc),
-                    "selectivity_ratio": float(sr_obs[desc]),
+                    "selectivity_ratio": float(sr_values[i]),
                     "p_value": float(perm_p[i]),
                     "cluster_id": clusters[str(desc)],
                     "_predictable": bool(predictable),
diff --git a/src/process_improve/sensory/tools.py b/src/process_improve/sensory/tools.py
index c8c53175..607fcd61 100644
--- a/src/process_improve/sensory/tools.py
+++ b/src/process_improve/sensory/tools.py
@@ -221,6 +221,18 @@ class _AnalyzeInput(BaseModel):
     n_components: int = Field(2, ge=1, description="Components for the PLS relate step and PCA map.")
     conf_level: float = Field(0.95, gt=0, lt=1, description="Confidence level for product-mean intervals.")
     alpha: float = Field(0.05, gt=0, lt=1, description="Target false-discovery rate for the relate step.")
+    discriminator: bool = Field(
+        True,
+        description=(
+            "Run the cross-validated discriminator: a per-attribute out-of-sample Q-squared gate, a "
+            "selectivity ratio per descriptor with a permutation test, and collinear-cluster grouping. "
+            "Set false to skip it (faster)."
+        ),
+    )
+    n_permutations: int = Field(
+        199, ge=1, description="Permutations for the discriminator's selectivity-ratio null."
+    )
+    random_state: int = Field(0, description="Seed for the discriminator's permutations and CV folds.")
     score_min: float | None = Field(None, description="Optional lower bound for the score scale.")
     score_max: float | None = Field(None, description="Optional upper bound for the score scale.")
 
@@ -233,7 +245,10 @@ class _AnalyzeInput(BaseModel):
         "attribute to the product. Observational mode relates attributes to measured descriptors with "
         "PLS and correlations (association). Returns the panel scorecard flags, dropped panelists, the "
         "MAM scaling coefficients and product F-tests, the relate results with Benjamini-Hochberg "
-        "q-values, product means with CIs, and a PCA map. Refuses to run if validation fails. "
+        "q-values, and (unless disabled) a cross-validated discriminator: a per-attribute Q-squared "
+        "gate, a selectivity ratio per descriptor with a permutation q-value, and collinear-cluster ids "
+        "that mark proxies which cannot be separated from a genuine driver. Also returns product means "
+        "with CIs and a PCA map. Refuses to run if validation fails. "
         "(Designed/DoE mode is planned for a later release.)"
     ),
     input_model=_AnalyzeInput,
@@ -261,6 +276,9 @@ def sensory_analyze_descriptive(spec: _AnalyzeInput) -> dict:
         n_components=spec.n_components,
         conf_level=spec.conf_level,
         alpha=spec.alpha,
+        discriminator=spec.discriminator,
+        n_permutations=spec.n_permutations,
+        random_state=spec.random_state,
     )
     scores = result.pca["scores"].reset_index().rename(columns={"index": "product"})
     return clean(
diff --git a/tests/test_sensory.py b/tests/test_sensory.py
index 672d75a7..d031f953 100644
--- a/tests/test_sensory.py
+++ b/tests/test_sensory.py
@@ -509,6 +509,7 @@ def test_tool_analyze_exposes_correction_and_mam():
         "covariates": [{"product": p, "d": i} for i, p in enumerate(products)],
         "mode": "observational",
         "correction": "align",
+        "discriminator": False,
     }
     out = execute_tool_call("sensory_analyze_descriptive", payload)
     assert out["ok"]

From 211ec6ba1d5b391de9a32c6804261f5a05be7e1c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 04:19:14 +0000
Subject: [PATCH 5/9] Use max-statistic permutation; extend end-to-end test
 with the discriminator

The per-descriptor permutation now controls multiplicity with a
Westfall-Young max-statistic null per attribute, so a single genuine
driver is detectable without the resolution loss of a per-test BH floor.
The Q-squared gate uses leave-one-out cross-validation (cheap and robust
to near-collinear blocks), and PLS fits step the component count down on
a singular block.

The end-to-end example gains three measurement-condition nuisances; the
test now asserts the Q-squared gate, the brix collinear cluster kept as
one inseparable group (Trap B), genuine lone drivers kept, and a chance
correlate (lab_humidity vs Liking) flagged by the marginal test but
demoted by the discriminator (Trap A).
---
 src/process_improve/sensory/analysis.py | 127 ++++++++++++++----------
 tests/test_sensory_end_to_end.py        |  70 ++++++++++++-
 2 files changed, 142 insertions(+), 55 deletions(-)

diff --git a/src/process_improve/sensory/analysis.py b/src/process_improve/sensory/analysis.py
index 5f7e70a3..7c86af55 100644
--- a/src/process_improve/sensory/analysis.py
+++ b/src/process_improve/sensory/analysis.py
@@ -22,6 +22,7 @@
 
 from __future__ import annotations
 
+import warnings
 from dataclasses import dataclass, field
 from typing import Any
 
@@ -109,6 +110,24 @@ def _attach_fdr(records: list[dict[str, Any]], alpha: float) -> list[dict[str, A
     return records
 
 
+def _fit_pls_safe(x: pd.DataFrame, y: pd.DataFrame, n_components: int) -> tuple[PLS | None, int]:
+    """Fit PLS, stepping the component count down on a near-collinear (singular) block.
+
+    Near-duplicate descriptor columns (a proxy that is almost an exact function
+    of a driver) can make the high-order PLS deflation singular. Retry with one
+    fewer component until the fit succeeds, returning ``(model, components_used)``
+    or ``(None, 0)`` if even a single component fails.
+    """
+    for k in range(max(1, n_components), 0, -1):
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", RuntimeWarning)
+                return PLS(n_components=k).fit(x, y), k
+        except np.linalg.LinAlgError:  # noqa: PERF203 - retry on singular block; loop is a few iterations
+            continue
+    return None, 0
+
+
 def _collinear_clusters(x_block: pd.DataFrame, threshold: float) -> dict[str, int]:
     """Group descriptors into clusters of mutually high absolute correlation.
 
@@ -147,7 +166,7 @@ def find(i: int) -> int:
     return cluster_of
 
 
-def discriminate_observational(  # noqa: PLR0913
+def discriminate_observational(  # noqa: PLR0913, PLR0915
     agg: pd.DataFrame,
     covariates: pd.DataFrame,
     *,
@@ -219,30 +238,32 @@ def discriminate_observational(  # noqa: PLR0913
         x_attr = x_block.loc[mask]
         y_attr = y.loc[mask]
         n_rows = x_attr.shape[0]
-
-        # 1. Cross-validated Q-squared gate (raw inputs; the splitter scales inside folds).
         cap = max(1, min(max_components_cv, x_attr.shape[1], n_rows - 2))
+        a = max(1, min(n_components, cap))
+
+        # Fit one PLS for this attribute, reused for the Q-squared gate and the
+        # selectivity ratio. ``_fit_pls_safe`` steps the component count down if
+        # the near-collinear descriptor block makes the fit singular.
+        x_scaled = MCUVScaler().fit_transform(x_attr)
+        y_scaled = MCUVScaler().fit_transform(y_attr.to_frame())
+        pls, a = _fit_pls_safe(x_scaled, y_scaled, a)
+
+        # 1. Leave-one-out cross-validated Q-squared gate: is the attribute
+        #    predictable from the descriptor block out of sample?
         predictable = False
         q2_cv = float("nan")
         rmsep_cv = float("nan")
-        a = max(1, min(n_components, cap))
-        if n_rows >= 5 and y_attr.std() > 0:
-            sel = PLS.select_n_components(
-                x_attr,
-                y_attr.to_frame(),
-                max_components=cap,
-                cv=5,
-                n_repeats=3,
-                random_state=random_state,
-            )
-            a = int(sel.n_components)
-            q2_cv = float(sel.r2y_validated.loc[a, "total"])
-            rmsep_cv = float(sel.rmsecv.loc[a, "total"])
+        if pls is not None and n_rows >= 5 and y_attr.std() > 0:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", RuntimeWarning)
+                cv = pls.cross_validate(x_scaled, y_scaled, cv="loo", show_progress=False)
+            q2_cv = float(cv.q_squared.iloc[0])
+            rmsep_cv = float(cv.rmse_cv.iloc[0])
             predictable = q2_cv > 0.0
         per_attribute.append(
             {
                 "attribute": str(attr),
-                "n_components_cv": a,
+                "n_components_cv": a if pls is not None else 0,
                 "q2_cv": q2_cv,
                 "rmsep_cv": rmsep_cv,
                 "predictable": bool(predictable),
@@ -250,50 +271,56 @@ def discriminate_observational(  # noqa: PLR0913
         )
 
         # 2. Selectivity ratio on the predictive direction, with a permutation
-        #    null. The permutation loop is the costly part, so it is skipped for
-        #    attributes the Q-squared gate already found unpredictable: none of
-        #    their descriptors can be flagged anyway.
-        x_scaled = MCUVScaler().fit_transform(x_attr)
-        y_scaled = MCUVScaler().fit_transform(y_attr.to_frame())
-        pls = PLS(n_components=a).fit(x_scaled, y_scaled)
-        sr_obs = selectivity_ratio(pls, x_scaled).reindex(descriptors)
-        sr_values = sr_obs.to_numpy()
-        if predictable:
-            y_values = y_scaled.to_numpy().ravel()
-            ge_counts = np.zeros(len(descriptors))
-            for _ in range(n_permutations):
-                permuted = pd.DataFrame(
-                    y_values[rng.permutation(n_rows)], index=x_scaled.index, columns=y_scaled.columns
-                )
-                pls_p = PLS(n_components=a).fit(x_scaled, permuted)
-                sr_p = selectivity_ratio(pls_p, x_scaled).reindex(descriptors).to_numpy()
-                ge_counts += sr_p >= sr_values
-            perm_p = (ge_counts + 1.0) / (n_permutations + 1.0)
+        #    null. Multiplicity across descriptors is controlled by the
+        #    max-statistic (Westfall-Young) permutation: for each label
+        #    permutation, the *largest* selectivity ratio over all descriptors
+        #    forms the null. An observed SR above that null is significant after
+        #    correction, so even a single genuine driver is detectable without
+        #    the resolution loss of a per-test Benjamini-Hochberg floor. The
+        #    permutation loop is skipped for attributes the Q-squared gate found
+        #    unpredictable: none of their descriptors can be flagged anyway.
+        k = len(descriptors)
+        if pls is None:  # degenerate block: nothing to relate for this attribute
+            sr_values = np.zeros(k)
+            p_raw = np.ones(k)
+            p_maxt = np.ones(k)
         else:
-            perm_p = np.ones(len(descriptors))
+            sr_values = np.asarray(selectivity_ratio(pls, x_scaled).reindex(descriptors), dtype=float)
+            if predictable:
+                y_values = y_scaled.to_numpy().ravel()
+                ge_each = np.zeros(k)  # per-descriptor null exceedances
+                ge_max = np.zeros(k)  # exceedances of the family-wide max null
+                done = 0
+                for _ in range(n_permutations):
+                    permuted = pd.DataFrame(
+                        y_values[rng.permutation(n_rows)], index=x_scaled.index, columns=y_scaled.columns
+                    )
+                    pls_p, _ = _fit_pls_safe(x_scaled, permuted, a)
+                    if pls_p is None:
+                        continue  # a degenerate permutation contributes nothing to the null
+                    sr_p = selectivity_ratio(pls_p, x_scaled).reindex(descriptors).to_numpy()
+                    ge_each += sr_p >= sr_values
+                    ge_max += np.nanmax(sr_p) >= sr_values
+                    done += 1
+                denom = done + 1.0
+                p_raw = (ge_each + 1.0) / denom
+                p_maxt = (ge_max + 1.0) / denom
+            else:
+                p_raw = np.ones(k)
+                p_maxt = np.ones(k)
         for i, desc in enumerate(descriptors):
             records.append(
                 {
                     "attribute": str(attr),
                     "descriptor": str(desc),
                     "selectivity_ratio": float(sr_values[i]),
-                    "p_value": float(perm_p[i]),
+                    "p_value": float(p_raw[i]),
+                    "q_value": float(p_maxt[i]),
+                    "discriminator_significant": bool(p_maxt[i] <= alpha and predictable),
                     "cluster_id": clusters[str(desc)],
-                    "_predictable": bool(predictable),
                 }
             )
 
-    # 3. Benjamini-Hochberg per attribute (each attribute is its own family of
-    #    tests), then gate the significance flag on the attribute being
-    #    predictable out of sample.
-    by_attribute: dict[str, list[dict[str, Any]]] = {}
-    for rec in records:
-        by_attribute.setdefault(rec["attribute"], []).append(rec)
-    for group in by_attribute.values():
-        _attach_fdr(group, alpha)
-    for rec in records:
-        rec["discriminator_significant"] = bool(rec.pop("significant") and rec.pop("_predictable"))
-
     return {
         "per_attribute": per_attribute,
         "descriptors": records,
diff --git a/tests/test_sensory_end_to_end.py b/tests/test_sensory_end_to_end.py
index cdaa4f71..9326b477 100644
--- a/tests/test_sensory_end_to_end.py
+++ b/tests/test_sensory_end_to_end.py
@@ -2,14 +2,19 @@
 
 A single synthetic scenario drives both this test and the documentation worked
 example, so the two stay in agreement. The data is entirely synthetic and
-generic: ten assessors score six products on nine attributes (wide layout, with
-a nuisance ``site`` column and a few missing cells), and a per-product table of
-instrumental covariates carries both genuine mechanistic correlates and
-spurious proxies/artifacts. Three assessors are planted to misbehave:
+generic: ten assessors score eighteen products on nine attributes (wide layout,
+with a nuisance ``site`` column and a few missing cells), and a per-product
+table of instrumental covariates carries genuine mechanistic correlates,
+spurious proxies that ride on a driver, and unrelated measurement-condition
+nuisances. Three assessors are planted to misbehave:
 
 * ``J07`` scores at random (disagrees with the panel),
 * ``J03`` rates everything high (a location shift),
 * ``J09`` uses only the middle of the scale (a compressed range).
+
+The covariate families exercise the cross-validated discriminator: it keeps the
+genuine drivers, reports the collinear proxies as one inseparable cluster, and
+demotes a nuisance that correlates with an attribute only by chance.
 """
 
 from __future__ import annotations
@@ -103,6 +108,15 @@ def make_panel_and_covariates(seed: int = 0) -> tuple[pd.DataFrame, pd.DataFrame
         }
     )
 
+    # Three unrelated measurement-condition nuisances (no causal link to any
+    # attribute). A dedicated RNG seed is chosen so one of them (lab_humidity)
+    # lands a chance correlation with an attribute in this small sample, which
+    # the marginal test flags but the cross-validated discriminator demotes.
+    noise_rng = np.random.default_rng(16)
+    covariates["headspace_volume"] = _sig(noise_rng.uniform(5.0, 25.0, n), 3)  # mL
+    covariates["sample_mass"] = _sig(noise_rng.uniform(180.0, 260.0, n), 3)  # g
+    covariates["lab_humidity"] = _sig(noise_rng.uniform(30.0, 70.0, n), 3)  # %RH
+
     # Panel: integer 0-10 scores, with three planted atypical assessors.
     offset = dict(zip(ASSESSORS, rng.normal(0, 0.3, len(ASSESSORS)), strict=True))
     rows = []
@@ -140,7 +154,7 @@ def _assoc_row(assoc: pd.DataFrame, attribute: str, descriptor: str) -> pd.Serie
     return assoc[(assoc["attribute"] == attribute) & (assoc["descriptor"] == descriptor)].iloc[0]
 
 
-def test_pipeline_end_to_end():
+def test_pipeline_end_to_end():  # noqa: PLR0915
     panel, covariates = make_panel_and_covariates()
 
     # Step 1: reshape wide -> long, ignoring the nuisance 'site' column.
@@ -197,6 +211,52 @@ def test_pipeline_end_to_end():
     assert set(assoc["attribute"]) == set(ATTRIBUTES)
     assert _assoc_row(assoc, "Bitterness", "polyphenols")["significant"]
 
+    # Step 5: the cross-validated discriminator separates predictive structure
+    # from in-sample coincidence.
+    disc = result.relate["discriminator"]
+    gate = pd.DataFrame(disc["per_attribute"]).set_index("attribute")
+    desc = pd.DataFrame(disc["descriptors"])
+    clusters = disc["clusters"]
+
+    def _disc(attribute: str, descriptor: str) -> bool:
+        row = desc[(desc["attribute"] == attribute) & (desc["descriptor"] == descriptor)].iloc[0]
+        return bool(row["discriminator_significant"])
+
+    # Q-squared gate: attributes with a real driver are predictable out of
+    # sample; attributes with none are not.
+    assert gate.loc["Sweetness", "q2_cv"] > 0.8
+    assert gate.loc["Liking", "q2_cv"] > 0.8
+    assert bool(gate.loc["Sweetness", "predictable"])
+    assert not bool(gate.loc["Juiciness", "predictable"])  # no covariate drives it
+
+    # Trap B: the proxies that ride on brix cannot be separated from it. They
+    # share one collinear cluster and all stay significant for Sweetness.
+    assert clusters["brix"] == clusters["refractive_index"] == clusters["specific_gravity"]
+    assert _disc("Sweetness", "brix")
+    assert _disc("Sweetness", "refractive_index")
+    assert _disc("Sweetness", "specific_gravity")
+
+    # Genuine lone drivers survive (and sit in their own clusters).
+    assert _disc("Sourness", "titratable_acidity")
+    assert _disc("Bitterness", "polyphenols")
+    assert clusters["titratable_acidity"] != clusters["brix"]
+
+    # Trap A: a nuisance that correlates with an attribute only by chance is
+    # flagged by the marginal test but demoted by the discriminator, while the
+    # genuine drivers of that same attribute survive.
+    assert _assoc_row(assoc, "Liking", "lab_humidity")["significant"]
+    assert not _disc("Liking", "lab_humidity")
+    assert _disc("Liking", "brix")
+    assert _disc("Liking", "price")
+
+    # No measurement-condition nuisance is ever discriminator-significant, and
+    # the discriminator flags strictly fewer pairs than the marginal test.
+    nuisances = {"headspace_volume", "sample_mass", "lab_humidity", "serving_temperature"}
+    assert not desc[desc["descriptor"].isin(nuisances)]["discriminator_significant"].any()
+    marginal_pairs = set(map(tuple, assoc[assoc["significant"]][["attribute", "descriptor"]].to_numpy()))
+    disc_pairs = set(map(tuple, desc[desc["discriminator_significant"]][["attribute", "descriptor"]].to_numpy()))
+    assert disc_pairs < marginal_pairs  # strict subset: the discriminator narrows the findings
+
 
 def test_means_only_table_is_refused():
     # A product-by-attribute summary (no panelist column) cannot drive the MAM.

From af9b5e83674fdf9b5d827e7e1e5aedcee2c9abb5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 04:23:25 +0000
Subject: [PATCH 6/9] Reframe the tutorial around the discriminator and
 collinearity honesty

Adds a 'Telling genuine drivers from proxies' section explaining the
cross-validated Q-squared gate, the selectivity ratio on the
target-projected direction, and collinear clustering, and a Step 5 in the
worked example. States plainly that observational cross-validation demotes
coincidences and groups proxies with their driver but cannot rank
descriptors within a collinear cluster: that needs an external dataset, a
designed experiment, or mechanistic knowledge.
---
 docs/user_guide/sensory_panel.rst | 115 ++++++++++++++++++++++++------
 1 file changed, 95 insertions(+), 20 deletions(-)

diff --git a/docs/user_guide/sensory_panel.rst b/docs/user_guide/sensory_panel.rst
index 1c69d235..422cdcf5 100644
--- a/docs/user_guide/sensory_panel.rst
+++ b/docs/user_guide/sensory_panel.rst
@@ -17,7 +17,9 @@ kind of product. The flow is:
 1. **validate** the panel data and a product-covariate table;
 2. **check the panel**, then either correct each panelist's scale use (the
    Mixed Assessor Model) or drop anomalous panelists;
-3. **relate** each attribute back to the product.
+3. **relate** each attribute back to the product, and **discriminate** the
+   descriptors that carry out-of-sample predictive signal from those that only
+   correlate in this one sample.
 
 The worked example near the end runs all of these on a synthetic dataset.
 
@@ -148,6 +150,8 @@ The observational relate output is:
 - ``result.relate["associations"]`` gives the per-(attribute, descriptor)
   correlation with a raw p-value, a Benjamini-Hochberg ``q_value`` across the
   whole family of tests, and a ``significant`` flag.
+- ``result.relate["discriminator"]`` adds the out-of-sample evidence described
+  in the next section.
 
 The result also carries supporting context: ``result.product_means`` (each
 product-by-attribute mean with a confidence interval) and ``result.pca`` (a PCA
@@ -156,6 +160,45 @@ sensory map of the products over the attributes).
 The designed-mode relate (factor *effects* rather than associations) is planned;
 ``mode="designed"`` raises ``NotImplementedError`` for now.
 
+Telling genuine drivers from proxies: the discriminator
+-------------------------------------------------------
+
+The ``associations`` table reports *marginal* correlations: each
+attribute-descriptor pair on its own. A pair can be significant for three
+different reasons, which a marginal correlation cannot tell apart: the
+descriptor genuinely drives the attribute; the descriptor only rides on a
+genuine driver (a proxy); or the descriptor happens to line up with the
+attribute in this particular set of products (a coincidence). The
+``discriminator`` adds out-of-sample evidence to separate these:
+
+- a per-attribute **cross-validated** :math:`Q^2` (leave-one-out): is the
+  attribute predictable from the descriptor block at all, on products the model
+  did not see? ``result.relate["discriminator"]["per_attribute"]`` reports
+  ``q2_cv`` and a ``predictable`` flag. A coincidence does not predict held-out
+  products, so its attribute often fails this gate.
+- a **selectivity ratio** per descriptor, computed on the *target-projected*
+  predictive direction (the single PLS component aligned with the attribute).
+  The selectivity ratio is the share of a descriptor's variance that the
+  predictive direction explains; unlike the marginal correlation it is judged
+  *given the other descriptors*, so a descriptor that adds nothing beyond the
+  real drivers scores low. Significance is assessed with a permutation test that
+  controls for testing many descriptors at once (a max-statistic null), gated on
+  the attribute being predictable. ``result.relate["discriminator"]["descriptors"]``
+  reports ``selectivity_ratio``, a permutation ``q_value`` and a
+  ``discriminator_significant`` flag.
+- a **collinear cluster id** per descriptor (``cluster_id``): descriptors whose
+  absolute correlation exceeds a threshold are grouped. Two descriptors in the
+  same cluster carry the same information, so they predict equally well and the
+  discriminator keeps them both. This is the honest limit of an observational
+  analysis: it can report that a group of descriptors is predictive, but it
+  cannot say *which one* inside a collinear cluster is the cause. Separating
+  them needs an external dataset that breaks the collinearity, a designed
+  experiment, or mechanistic knowledge.
+
+So the discriminator demotes coincidences (they fail the gate or the
+selectivity-ratio test) and groups proxies with their driver, but it does not,
+and cannot, rank descriptors within a collinear cluster.
+
 Correcting the panel: the Mixed Assessor Model
 ----------------------------------------------
 
@@ -205,13 +248,18 @@ intensity, Sweetness, Sourness, Bitterness, Firmness, Juiciness, Colour
 intensity, Aftertaste, Liking), as integers on a 0-10 scale. The scores came out
 of the scoring software in the wide layout, with an extra ``site`` column we do
 not need and a few missing cells. We also have instrumental measurements per
-product, in realistic physical units, split on purpose into two kinds: genuine
-mechanistic correlates (``brix`` for sweetness, ``titratable_acidity`` for
-sourness, ``polyphenols`` for bitterness, ``aroma_oav`` for aroma, ``viscosity``
-for firmness) and spurious proxies or artifacts (``refractive_index`` and
-``specific_gravity`` ride on ``brix``; ``conductivity`` rides on the acid ions;
-``total_dissolved_solids`` is an aggregate; ``price`` tracks Liking only through
-the sample frame; ``serving_temperature`` is unrelated).
+product, in realistic physical units, split on purpose into three kinds:
+
+- genuine mechanistic correlates: ``brix`` for sweetness, ``titratable_acidity``
+  for sourness, ``polyphenols`` for bitterness, ``aroma_oav`` for aroma,
+  ``viscosity`` for firmness;
+- proxies that ride on a genuine driver: ``refractive_index`` and
+  ``specific_gravity`` rise with dissolved sugar (``brix``); ``conductivity``
+  rides on the acid ions; ``total_dissolved_solids`` is an aggregate; ``price``
+  tracks Liking through the sample frame;
+- unrelated measurement-condition nuisances with no causal link to any
+  attribute: ``serving_temperature``, ``headspace_volume``, ``sample_mass`` and
+  ``lab_humidity``.
 
 Three assessors were constructed to misbehave: J07 scores at random, J03 rates
 everything high, and J09 uses only the middle of the scale.
@@ -268,17 +316,43 @@ measurements.
    assoc = pd.DataFrame(result.relate["associations"])
    print(assoc[assoc["significant"]])
 
-The genuine correlates are significant (``brix`` with Sweetness,
-``titratable_acidity`` with Sourness, ``price`` with Liking, each with a
-Benjamini-Hochberg q-value), but so are the spurious proxies (``refractive_index``
-and ``specific_gravity`` with Sweetness). Both of those ride on ``brix`` by
-construction (refractive index and specific gravity rise with dissolved sugar,
-not with perceived sweetness), so within this one dataset they track Sweetness
-just as strongly as ``brix`` itself.
-This is the trap to watch: a within-sample correlation is not a transferable,
-causal link. Telling a genuine correlate apart from a proxy that merely rides
-on it needs out-of-sample evidence (cross-validated Q-squared, a Van der Voet
-test, or a selectivity ratio), which is covered separately.
+The marginal test flags sixteen pairs. The genuine correlates are there
+(``brix`` with Sweetness, ``titratable_acidity`` with Sourness, ``price`` with
+Liking), but so are the proxies (``refractive_index`` and ``specific_gravity``
+with Sweetness, which ride on ``brix``) and even a coincidence: ``lab_humidity``
+correlates with Liking (r about -0.67) purely by chance in these eighteen
+products. A within-sample correlation on its own cannot tell these three cases
+apart.
+
+**Step 5, discriminate.** The discriminator adds the out-of-sample evidence.
+
+.. code-block:: python
+
+   disc = result.relate["discriminator"]
+   gate = pd.DataFrame(disc["per_attribute"])      # cross-validated Q-squared per attribute
+   drivers = pd.DataFrame(disc["descriptors"])     # selectivity ratio, q-value, cluster id
+   print(drivers[drivers["discriminator_significant"]])
+
+It narrows the sixteen marginal hits to twelve:
+
+- The cross-validated :math:`Q^2` gate keeps Sweetness (``q2_cv`` about 0.95) and
+  Liking (about 0.94) but rejects Juiciness, Colour intensity and Aftertaste,
+  for which no covariate predicts held-out products.
+- ``lab_humidity`` is demoted: although it correlates with Liking in-sample, it
+  carries no predictive selectivity, so the permutation test does not keep it,
+  while the genuine ``brix`` and ``price`` for Liking remain significant. This is
+  the coincidence caught.
+- ``brix``, ``refractive_index`` and ``specific_gravity`` all stay significant
+  for Sweetness and share one ``cluster_id``. The discriminator reports them as a
+  single inseparable group: from these data you cannot say which of the three is
+  the cause, because each carries the same information.
+
+This is the trap, stated precisely: a within-sample correlation is not a
+transferable, causal link. Cross-validation demotes coincidences, and the
+selectivity ratio plus clustering groups proxies with their driver, but ranking
+descriptors *within* a collinear cluster needs evidence this single panel cannot
+supply: an external dataset that breaks the collinearity, a designed experiment,
+or mechanistic knowledge.
 
 The full runnable scenario is in the test suite
 (``tests/test_sensory_end_to_end.py``).
@@ -297,7 +371,8 @@ and covariate tables as lists of row-records and returning JSON:
   the scorecard with flags, the MAM scaling coefficients and F-tests, and,
   with ``align=true``, the rescaled panel.
 - ``sensory_analyze_descriptive`` - the full pipeline, with a ``correction``
-  option (``"none"`` / ``"align"`` / ``"drop"``) and the MAM results in its
+  option (``"none"`` / ``"align"`` / ``"drop"``), the MAM results, and (unless
+  ``discriminator`` is set false) the cross-validated discriminator in its
   output.
 
 The analyze tool validates first and refuses to run if validation fails, so an

From 8c4496c1e398bc9594e99173cd449246dc8be27e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 04:29:00 +0000
Subject: [PATCH 7/9] Bump to 1.50.0; cover discriminator and diagnostic error
 paths

Version 1.50.0 (new feature: PLS target projection / selectivity ratio and
the cross-validated sensory discriminator), with CITATION and CHANGELOG in
sync. Adds unit tests for the diagnostic error paths (response selection,
unfitted models, the small-sample F-critical) and for the discriminator's
Q-squared gate and collinear clustering.
---
 CHANGELOG.md               | 22 ++++++++++++++++++++-
 CITATION.cff               |  4 ++--
 pyproject.toml             |  2 +-
 tests/test_multivariate.py | 18 ++++++++++++++++-
 tests/test_sensory.py      | 40 +++++++++++++++++++++++++++++++++++++-
 5 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 62bdbcef..b02ffbf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,25 @@ those changes.
 
 ## [Unreleased]
 
+## [1.50.0] - 2026-06-30
+
+### Added
+
+- `target_projection` and `selectivity_ratio` PLS diagnostics (in
+  `process_improve.multivariate`, bound as PLS convenience methods): the
+  target-projected predictive component (Kvalheim and Karstang, 1989) and each
+  feature's explained-to-residual variance ratio on it (Rajalahti et al., 2009),
+  a better-behaved alternative to VIP for ranking predictive relevance.
+- A cross-validated discriminator for the observational relate step
+  (`discriminate_observational`, surfaced as `result.relate["discriminator"]`
+  and through the `sensory_analyze_descriptive` tool): a per-attribute
+  leave-one-out Q-squared gate, a selectivity ratio per descriptor with a
+  max-statistic permutation test, and collinear-cluster grouping. It demotes
+  descriptors that only correlate in-sample and reports proxies that ride on a
+  driver as one inseparable cluster, while stating that ranking within a
+  collinear cluster needs an external dataset or a designed experiment. The
+  user-guide tutorial is reframed around this.
+
 ## [1.49.1] - 2026-06-29
 
 ### Added
@@ -2263,7 +2282,8 @@ this entry records them together.
 - Reworked the README with a sharper value proposition and a
   "Why not scikit-learn?" comparison table.
 
-[Unreleased]: https://github.com/kgdunn/process-improve/compare/v1.49.1...HEAD
+[Unreleased]: https://github.com/kgdunn/process-improve/compare/v1.50.0...HEAD
+[1.50.0]: https://github.com/kgdunn/process-improve/compare/v1.49.1...v1.50.0
 [1.49.1]: https://github.com/kgdunn/process-improve/compare/v1.49.0...v1.49.1
 [1.49.0]: https://github.com/kgdunn/process-improve/compare/v1.48.0...v1.49.0
 [1.48.0]: https://github.com/kgdunn/process-improve/compare/v1.47.0...v1.48.0
diff --git a/CITATION.cff b/CITATION.cff
index 012d360b..86a9ab10 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -12,8 +12,8 @@ authors:
 repository-code: "https://github.com/kgdunn/process-improve"
 url: "https://kgdunn.github.io/process-improve/"
 license: MIT
-version: 1.49.1
-date-released: "2026-06-29"
+version: 1.50.0
+date-released: "2026-06-30"
 keywords:
   - chemometrics
   - multivariate analysis
diff --git a/pyproject.toml b/pyproject.toml
index 1878a76d..b75bc235 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "process-improve"
-version = "1.49.1"
+version = "1.50.0"
 description = 'Designed Experiments; Latent Variables (PCA, PLS, multivariate methods with missing data); Process Monitoring; Batch data analysis.'
 readme = "README.md"
 license = "MIT"
diff --git a/tests/test_multivariate.py b/tests/test_multivariate.py
index bc9eff39..abe9767f 100644
--- a/tests/test_multivariate.py
+++ b/tests/test_multivariate.py
@@ -2313,13 +2313,29 @@ def test_selectivity_ratio_multi_response_and_errors(
     assert list(sr_frame.index) == list(X.columns)
     assert np.isfinite(sr_frame.to_numpy()).all()
 
-    # Selecting one response gives a Series; an unknown response raises.
+    # Selecting one response by name or integer position gives a Series.
     assert isinstance(pls.selectivity_ratio(Xs, response="y0"), pd.Series)
+    assert isinstance(pls.selectivity_ratio(Xs, response=1), pd.Series)
     with pytest.raises(ValueError, match="response"):
         pls.selectivity_ratio(Xs, response="not_a_response")
+    with pytest.raises(ValueError, match="out of range"):
+        pls.selectivity_ratio(Xs, response=9)
     with pytest.raises(ValueError, match=r"not fitted|beta_coefficients_"):
         selectivity_ratio(PLS(n_components=2), Xs)
 
+    # target_projection on a multi-response model needs an explicit response,
+    # and refuses an unfitted model.
+    with pytest.raises(ValueError, match="several responses"):
+        target_projection(pls, Xs)
+    with pytest.raises(ValueError, match=r"not fitted|beta_coefficients_"):
+        target_projection(PLS(n_components=2), Xs)
+
+    # With only three samples the F-based critical value is undefined (NaN).
+    tiny = MCUVScaler().fit_transform(X.iloc[:3])
+    tiny_y = MCUVScaler().fit_transform(Y.iloc[:3, [0]])
+    sr_tiny = PLS(n_components=1).fit(tiny, tiny_y).selectivity_ratio(tiny)
+    assert np.isnan(sr_tiny.attrs["f_critical"])
+
     # Real LDPE dataset (SIMCA reference data): SR is finite and non-negative.
     data = fixture_pls_ldpe_example
     X_ldpe = MCUVScaler().fit_transform(data["X"])
diff --git a/tests/test_sensory.py b/tests/test_sensory.py
index d031f953..2375d965 100644
--- a/tests/test_sensory.py
+++ b/tests/test_sensory.py
@@ -18,7 +18,7 @@
     panel_scorecard,
     validate_descriptive,
 )
-from process_improve.sensory.analysis import relate_designed
+from process_improve.sensory.analysis import _collinear_clusters, discriminate_observational, relate_designed
 from process_improve.sensory.ingest import reshape_to_long
 from process_improve.univariate.metrics import benjamini_hochberg
 
@@ -224,6 +224,44 @@ def test_relate_observational_q_values_monotone():
     assert np.all(np.diff(assoc["q_value"].to_numpy()) >= -1e-12)
 
 
+def test_collinear_clusters_groups_correlated_descriptors():
+    rng = np.random.default_rng(0)
+    base = rng.standard_normal(20)
+    block = pd.DataFrame(
+        {"a": base, "b": base + 0.001 * rng.standard_normal(20), "c": rng.standard_normal(20)}
+    )
+    clusters = _collinear_clusters(block, threshold=0.95)
+    assert clusters["a"] == clusters["b"]  # near-identical columns group together
+    assert clusters["c"] != clusters["a"]  # an independent column is its own cluster
+
+
+def test_discriminator_gate_and_clusters():
+    products = [f"P{i}" for i in range(9)]
+    rng = np.random.default_rng(3)
+    u = np.linspace(0.0, 1.0, 9) + rng.normal(0, 0.02, 9)
+    agg = pd.DataFrame(
+        {"A": 2.0 * u + rng.normal(0, 0.05, 9), "B": rng.normal(0, 1, 9)}, index=products
+    )
+    cov = pd.DataFrame(
+        {"d1": u, "d2": u + 0.005 * rng.normal(0, 1, 9), "d3": rng.normal(0, 1, 9)}, index=products
+    )
+    disc = discriminate_observational(agg, cov, n_components=1, n_permutations=49, random_state=0)
+
+    # The collinear pair shares a cluster; the noise descriptor does not.
+    assert disc["clusters"]["d1"] == disc["clusters"]["d2"] != disc["clusters"]["d3"]
+
+    gate = pd.DataFrame(disc["per_attribute"]).set_index("attribute")
+    assert bool(gate.loc["A", "predictable"])  # A is driven by d1/d2
+    assert not bool(gate.loc["B", "predictable"])  # B is noise, not predictable
+
+    desc = pd.DataFrame(disc["descriptors"])
+    assert set(desc.columns) >= {"selectivity_ratio", "p_value", "q_value", "discriminator_significant"}
+    # Nothing is flagged for the unpredictable attribute, and the noise
+    # descriptor is never flagged.
+    assert not desc[desc["attribute"] == "B"]["discriminator_significant"].any()
+    assert not desc[desc["descriptor"] == "d3"]["discriminator_significant"].any()
+
+
 def test_relate_observational_requires_numeric_descriptors():
     obs = pd.DataFrame({"product": PRODUCTS, "grade": list("AABBCCA")})
     result = validate_descriptive(_panel(), obs, mode="observational")

From d1a788430a0c19eb8e4afcdc22a065a748950a9d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 05:54:51 +0000
Subject: [PATCH 8/9] Name the discriminator's diagnostics and cite their
 sources in the tutorial

Add an 'under the hood' note pointing at target_projection and
selectivity_ratio with their one-line formulas, plus a References block
(Kvalheim and Karstang 1989; Rajalahti et al. 2009), so the worked example
is self-contained.
---
 docs/user_guide/sensory_panel.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/user_guide/sensory_panel.rst b/docs/user_guide/sensory_panel.rst
index 422cdcf5..0a5b4ca4 100644
--- a/docs/user_guide/sensory_panel.rst
+++ b/docs/user_guide/sensory_panel.rst
@@ -199,6 +199,29 @@ So the discriminator demotes coincidences (they fail the gate or the
 selectivity-ratio test) and groups proxies with their driver, but it does not,
 and cannot, rank descriptors within a collinear cluster.
 
+Under the hood, the two predictive-importance steps are the standalone
+diagnostics :func:`~process_improve.multivariate.target_projection` and
+:func:`~process_improve.multivariate.selectivity_ratio` (also available as PLS
+methods). Target projection rotates the fitted PLS so that one component lies
+along the regression vector :math:`b` for the chosen attribute: the weight is
+:math:`w_{\text{TP}} = b / \lVert b \rVert` and the scores are
+:math:`t_{\text{TP}} = X\, w_{\text{TP}}`. The selectivity ratio of descriptor
+:math:`j` is the ratio of its explained to its residual sum of squares on that
+single component,
+:math:`\text{SR}_j = \text{SS}_{\text{explained},j} / \text{SS}_{\text{residual},j}`,
+so it is large only for descriptors aligned with the predictive direction. Two
+collinear descriptors carry near-identical selectivity ratios, which is why the
+clustering step is needed alongside it.
+
+.. rubric:: References
+
+- Kvalheim, O. M. and Karstang, T. V. (1989). Interpretation of latent-variable
+  regression models. *Chemometrics and Intelligent Laboratory Systems*, 7(1-2),
+  39-51. (target projection)
+- Rajalahti, T. et al. (2009). Biomarker discovery in mass spectral profiles by
+  means of selectivity ratio plot. *Chemometrics and Intelligent Laboratory
+  Systems*, 95(1), 35-48. (selectivity ratio)
+
 Correcting the panel: the Mixed Assessor Model
 ----------------------------------------------
 

From 73077425999606910576b8d49a095b489d387a51 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 30 Jun 2026 06:06:05 +0000
Subject: [PATCH 9/9] Document the return shape of every sensory agent tool

Each @tool_spec description now carries a Returns section enumerating the
JSON keys it produces, so an importing agent knows the output contract
without inspecting a response first. The analyze tool spells out the full
discriminator structure (per_attribute, descriptors, clusters) and notes
that descriptors sharing a cluster_id cannot be told apart.
---
 src/process_improve/sensory/tools.py | 37 ++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/process_improve/sensory/tools.py b/src/process_improve/sensory/tools.py
index 607fcd61..8f5d2d8f 100644
--- a/src/process_improve/sensory/tools.py
+++ b/src/process_improve/sensory/tools.py
@@ -94,7 +94,11 @@ class _ReshapeInput(BaseModel):
         "wide-by-attribute layout (one column per attribute). You supply an explicit column mapping; "
         "the tool melts if needed and verifies round-trip invariants (grand mean, per-attribute and "
         "per-panelist means, and cell count are identical before and after), failing if the mapping is "
-        "wrong rather than silently corrupting the data. Run this before sensory_validate_descriptive."
+        "wrong rather than silently corrupting the data. Run this before sensory_validate_descriptive. "
+        "Returns: on success {ok: true, checks, long}, where 'long' is the reshaped rows in the "
+        "descriptive_long schema and 'checks' holds the round-trip invariants (ok, grand_mean_before, "
+        "grand_mean_after, grand_mean_diff, per_attribute_max_diff, per_panelist_max_diff, "
+        "n_cells_before, n_cells_after). On a bad mapping it returns {ok: false, errors: [str]}."
     ),
     input_model=_ReshapeInput,
     category="sensory",
@@ -157,8 +161,10 @@ class _ValidateInput(BaseModel):
     description=(
         "Validate descriptive panel data against the descriptive_long schema and a product-covariate "
         "table. Checks required columns, dtypes, score range, panel balance, and label encoding, plus "
-        "mode-specific covariate checks. Returns ok/warnings/errors, a content hash, and summary counts. "
-        "Run this before sensory_analyze_descriptive."
+        "mode-specific covariate checks. Run this before sensory_analyze_descriptive. "
+        "Returns: {ok: bool, mode, warnings: [str], errors: [str], content_hash: str, stats: object}. "
+        "'ok' gates the rest of the pipeline; 'errors' is non-empty only when ok is false, while "
+        "'warnings' (for example panel-imbalance notes) can appear even when ok is true."
     ),
     input_model=_ValidateInput,
     category="sensory",
@@ -249,7 +255,20 @@ class _AnalyzeInput(BaseModel):
         "gate, a selectivity ratio per descriptor with a permutation q-value, and collinear-cluster ids "
         "that mark proxies which cannot be separated from a genuine driver. Also returns product means "
         "with CIs and a PCA map. Refuses to run if validation fails. "
-        "(Designed/DoE mode is planned for a later release.)"
+        "(Designed/DoE mode is planned for a later release.) "
+        "Returns: on validation failure {ok: false, errors: [str], warnings: [str]}. On success "
+        "{ok: true, mode, warnings, flagged, flag_reasons, dropped, correction, mam, relate, "
+        "product_means, pca}. 'flagged'/'dropped' are panelist-id lists; 'mam' has 'scaling' and "
+        "'ftests' (as in sensory_panel_check); 'product_means' is rows of product, attribute, mean, "
+        "ci_low, ci_high; 'pca' has 'explained_variance' and 'scores'. 'relate' (observational) holds "
+        "{mode, n_components, alpha, vip, associations, discriminator}: 'vip' is rows of descriptor, vip; "
+        "'associations' is the marginal table, rows of attribute, descriptor, r, p_value, q_value, "
+        "significant. 'discriminator' (present unless disabled) holds {per_attribute, descriptors, "
+        "clusters, alpha, n_permutations, cluster_threshold}: 'per_attribute' is rows of attribute, "
+        "n_components_cv, q2_cv, rmsep_cv, predictable; 'descriptors' is rows of attribute, descriptor, "
+        "selectivity_ratio, p_value, q_value, discriminator_significant, cluster_id; 'clusters' maps "
+        "each descriptor to an integer collinear-cluster id (descriptors sharing an id cannot be told "
+        "apart, so a significant one may be a proxy for another in the same cluster)."
     ),
     input_model=_AnalyzeInput,
     category="sensory",
@@ -335,7 +354,15 @@ class _PanelCheckInput(BaseModel):
         "flagged panelists and reasons, and the Mixed Assessor Model results: each panelist's scaling "
         "coefficient beta per attribute (beta<1 compresses the scale, >1 expands it) and the MAM versus "
         "classical product-effect F-tests. With align=true, also returns the panel rescaled onto a "
-        "common scale so scale-usage differences are removed while genuine disagreement is preserved."
+        "common scale so scale-usage differences are removed while genuine disagreement is preserved. "
+        "Returns: {ok: true, scorecard, flagged, flag_reasons, mam}. 'scorecard' is one row per "
+        "panelist (panelist_id, discrimination, agreement, scale_shift, scale_spread, drift); 'flagged' "
+        "is the list of anomalous panelist ids and 'flag_reasons' maps each to its list of reasons; "
+        "'mam' has "
+        "'scaling' (rows of attribute, panelist_id, beta, offset, mean) and 'ftests' (rows of attribute, "
+        "f_product_mam, p_product_mam, f_product_classical, p_product_classical, df_product, "
+        "df_disagreement). With align=true an 'aligned_panel' (rescaled descriptive_long rows) is added. "
+        "On missing columns it returns {ok: false, errors: [str]}."
     ),
     input_model=_PanelCheckInput,
     category="sensory",