kgdunn · kgdunn · Jul 1, 2026 · Jul 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,18 @@ those changes.
 
 ## [Unreleased]
 
+## [1.52.0] - 2026-07-01
+
+### Added
+
+- The panel scorecard now reports two-sided outlier bands per panelist:
+  `scale_use_band` and `offset_band`, each `low` / `normal` / `high` (Tukey
+  1.5*IQR fences). They surface both tails symmetrically (a compressor is
+  reported like an expander) and are exposed through `sensory_panel_check`
+  (scorecard rows) and `sensory_analyze_descriptive` (a `scale_bands` block), so
+  a front-end can colour or label a panel map from a stable classification
+  instead of re-deriving thresholds from the raw numbers.
+
 ## [1.51.0] - 2026-06-30
 
 ### Added
@@ -2297,7 +2309,8 @@ this entry records them together.
 - Reworked the README with a sharper value proposition and a
   "Why not scikit-learn?" comparison table.
 
-[Unreleased]: https://github.com/kgdunn/process-improve/compare/v1.51.0...HEAD
+[Unreleased]: https://github.com/kgdunn/process-improve/compare/v1.52.0...HEAD
+[1.52.0]: https://github.com/kgdunn/process-improve/compare/v1.51.0...v1.52.0
 [1.51.0]: https://github.com/kgdunn/process-improve/compare/v1.50.0...v1.51.0
 [1.50.0]: https://github.com/kgdunn/process-improve/compare/v1.49.1...v1.50.0
 [1.49.1]: https://github.com/kgdunn/process-improve/compare/v1.49.0...v1.49.1

diff --git a/CITATION.cff b/CITATION.cff
@@ -12,8 +12,8 @@ authors:
 repository-code: "https://github.com/kgdunn/process-improve"
 url: "https://kgdunn.github.io/process-improve/"
 license: MIT
-version: 1.51.0
-date-released: "2026-06-30"
+version: 1.52.0
+date-released: "2026-07-01"
 keywords:
   - chemometrics
   - multivariate analysis

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "process-improve"
-version = "1.51.0"
+version = "1.52.0"
 description = 'Designed Experiments; Latent Variables (PCA, PLS, multivariate methods with missing data); Process Monitoring; Batch data analysis.'
 readme = "README.md"
 license = "MIT"

diff --git a/src/process_improve/sensory/panel.py b/src/process_improve/sensory/panel.py
@@ -51,7 +51,8 @@ class PanelScorecard:
     table : pandas.DataFrame
         One row per panelist, indexed by ``panelist_id``, with the columns
         ``discrimination``, ``agreement``, ``scale_shift``, ``scale_spread``,
-        and ``drift``.
+        ``drift``, and the two-sided outlier bands ``scale_use_band`` and
+        ``offset_band`` (each ``"low"`` / ``"normal"`` / ``"high"``).
     flagged : list of str
         Panelist ids flagged as anomalous.
     reasons : dict
@@ -97,6 +98,33 @@ def _low_tail_outliers(values: pd.Series) -> set[str]:
     return {str(clean.index[i]) for i in indices if arr[i] < median}
 
 
+def _tail_bands(values: pd.Series) -> pd.Series:
+    """Classify each panelist as ``"low"`` / ``"normal"`` / ``"high"`` on ``values``.
+
+    Uses two-sided Tukey fences: below ``Q1 - 1.5 * IQR`` is ``"low"``, above
+    ``Q3 + 1.5 * IQR`` is ``"high"``, everything between is ``"normal"``. A
+    caller (for example a front-end colouring a panel map) can act on this stable
+    label instead of re-deriving thresholds from the raw numbers, and both tails
+    are surfaced symmetrically so a compressor is reported just like an expander.
+    The IQR fence is deliberately looser than the ESD test used for dropping,
+    because scale use and offset are corrected by alignment rather than dropped,
+    so the aim is to surface the genuine tails, not to withhold all but the most
+    extreme.
+    """
+    bands = pd.Series("normal", index=values.index, dtype=object)
+    clean = values.dropna()
+    if clean.size < _MIN_PANELISTS_FOR_ESD:
+        return bands
+    q1, q3 = (float(clean.quantile(q)) for q in (0.25, 0.75))
+    iqr = q3 - q1
+    if iqr <= 0:
+        return bands
+    low_fence, high_fence = q1 - 1.5 * iqr, q3 + 1.5 * iqr
+    bands.loc[clean.index[clean < low_fence]] = "low"
+    bands.loc[clean.index[clean > high_fence]] = "high"
+    return bands
+
+
 def panel_scorecard(panel: pd.DataFrame) -> PanelScorecard:
     """Score each panelist and flag anomalies.
 
@@ -174,6 +202,12 @@ def panel_scorecard(panel: pd.DataFrame) -> PanelScorecard:
     table = pd.DataFrame.from_dict(records, orient="index")
     table.index.name = "panelist_id"
 
+    # Two-sided outlier band per scale metric, so a caller can colour / label
+    # a panelist without re-deriving thresholds. Scale use and offset are
+    # correctable by alignment, so they are reported as bands here, not flagged.
+    table["scale_use_band"] = _tail_bands(table["scale_spread"])
+    table["offset_band"] = _tail_bands(table["scale_shift"])
+
     # --- Flagging ------------------------------------------------------
     # Flag only the two axes that threaten product validity: a panelist who
     # disagrees with the panel, or who does not separate the products. Scale

diff --git a/src/process_improve/sensory/tools.py b/src/process_improve/sensory/tools.py
@@ -257,8 +257,10 @@ class _AnalyzeInput(BaseModel):
         "with CIs and a PCA map. Refuses to run if validation fails. "
         "(Designed/DoE mode is planned for a later release.) "
         "Returns: on validation failure {ok: false, errors: [str], warnings: [str]}. On success "
-        "{ok: true, mode, warnings, flagged, flag_reasons, dropped, correction, mam, relate, "
-        "product_means, pca}. 'flagged'/'dropped' are panelist-id lists; 'mam' has 'scaling' and "
+        "{ok: true, mode, warnings, flagged, flag_reasons, scale_bands, dropped, correction, mam, relate, "
+        "product_means, pca}. 'flagged'/'dropped' are panelist-id lists; 'scale_bands' is rows of "
+        "panelist_id, scale_use_band, offset_band (each 'low'/'normal'/'high') for colouring a panel map; "
+        "'mam' has 'scaling' and "
         "'ftests' (as in sensory_panel_check); 'product_means' is rows of product, attribute, mean, "
         "ci_low, ci_high; 'pca' has 'explained_variance' and 'scores'. 'relate' (observational) holds "
         "{mode, n_components, alpha, vip, associations, discriminator}: 'vip' is rows of descriptor, vip; "
@@ -307,6 +309,9 @@ def sensory_analyze_descriptive(spec: _AnalyzeInput) -> dict:
             "warnings": validated.warnings,
             "flagged": result.panel.flagged,
             "flag_reasons": result.panel.reasons,
+            "scale_bands": result.panel.table[["scale_use_band", "offset_band"]]
+            .reset_index()
+            .to_dict(orient="records"),
             "dropped": result.dropped,
             "correction": result.correction,
             "mam": {
@@ -356,7 +361,11 @@ class _PanelCheckInput(BaseModel):
         "classical product-effect F-tests. With align=true, also returns the panel rescaled onto a "
         "common scale so scale-usage differences are removed while genuine disagreement is preserved. "
         "Returns: {ok: true, scorecard, flagged, flag_reasons, mam}. 'scorecard' is one row per "
-        "panelist (panelist_id, discrimination, agreement, scale_shift, scale_spread, drift); 'flagged' "
+        "panelist (panelist_id, discrimination, agreement, scale_shift, scale_spread, drift, plus the "
+        "two-sided outlier bands scale_use_band and offset_band, each 'low' / 'normal' / 'high'); use the "
+        "bands to colour or label a panel map (low/high scale_use_band = compresses/expands the range; "
+        "low/high offset_band = rates consistently low/high) instead of re-deriving thresholds, and only "
+        "call out the non-normal ones. 'flagged' "
         "is the list of anomalous panelist ids and 'flag_reasons' maps each to its list of reasons; "
         "'mam' has "
         "'scaling' (rows of attribute, panelist_id, beta, offset, mean) and 'ftests' (rows of attribute, "

diff --git a/tests/test_sensory.py b/tests/test_sensory.py
@@ -179,6 +179,61 @@ def test_scorecard_clean_panel_has_no_flags():
     assert card.flagged == []
 
 
+def _band_panel():
+    """Build a panel of typical panelists plus one clear expander, compressor, and high rater."""
+    rng = np.random.default_rng(0)
+    products = [f"P{i}" for i in range(6)]
+    attrs = [f"A{j}" for j in range(4)]
+    truth = {(p, a): rng.uniform(2, 8) for p in products for a in attrs}
+    gains = {f"N{k}": 1.0 for k in range(8)}
+    gains["STR"] = 3.0  # expander: wide range
+    gains["CMP"] = 0.12  # compressor: narrow range
+    centre = 5.0
+    rows = []
+    for pid, gain in gains.items():
+        offset = 3.0 if pid == "N0" else 0.0  # N0 rates systematically high
+        for p in products:
+            for a in attrs:
+                score = centre + offset + gain * (truth[(p, a)] - centre) + rng.normal(0, 0.2)
+                rows.append(
+                    {
+                        "panelist_id": pid,
+                        "session": 1,
+                        "product": p,
+                        "attribute": a,
+                        "replicate": 1,
+                        "score": float(np.clip(score, 0, 10)),
+                    }
+                )
+    return pd.DataFrame(rows)
+
+
+def test_tail_bands_guards_and_outliers():
+    from process_improve.sensory.panel import _tail_bands
+
+    # Fewer panelists than the outlier-test minimum -> everyone normal.
+    assert set(_tail_bands(pd.Series([1.0, 5.0, 9.0], index=list("abc")))) == {"normal"}
+    # No spread (IQR == 0) -> everyone normal, no divide-by-zero.
+    assert set(_tail_bands(pd.Series([2.0] * 8, index=list("abcdefgh")))) == {"normal"}
+    # A clear high-side outlier is banded 'high'.
+    vals = pd.Series([1.0, 1.1, 0.9, 1.05, 0.95, 1.02, 0.98, 5.0], index=list("abcdefgh"))
+    assert _tail_bands(vals)["h"] == "high"
+
+
+def test_scorecard_reports_two_sided_scale_bands():
+    card = panel_scorecard(_band_panel())
+    t = card.table
+    assert {"scale_use_band", "offset_band"}.issubset(t.columns)
+    assert set(t["scale_use_band"].unique()) <= {"low", "normal", "high"}
+    assert set(t["offset_band"].unique()) <= {"low", "normal", "high"}
+    # The expander / compressor surface on the two sides of scale use; a typical
+    # panelist stays normal; the high rater surfaces on offset.
+    assert t.loc["STR", "scale_use_band"] == "high"
+    assert t.loc["CMP", "scale_use_band"] == "low"
+    assert t.loc["N1", "scale_use_band"] == "normal"
+    assert t.loc["N0", "offset_band"] == "high"
+
+
 def test_dropping_panelist_changes_means():
     validated = validate_descriptive(_panel(), _obs(), mode="observational")
     kept = analyze_descriptive(validated, drop_panelists=None, discriminator=False)
@@ -554,3 +609,8 @@ def test_tool_analyze_exposes_correction_and_mam():
     assert out["correction"] == "align"
     ftest = out["mam"]["ftests"][0]
     assert ftest["f_product_mam"] > ftest["f_product_classical"]
+    # The scale-use / offset bands are exposed for the front-end to colour from.
+    bands = out["scale_bands"]
+    assert bands
+    assert {"panelist_id", "scale_use_band", "offset_band"} <= set(bands[0])
+    assert all(b["scale_use_band"] in {"low", "normal", "high"} for b in bands)