diff --git a/data/05_bpmn_1.JSON b/data/05_bpmn_1.JSON
index a5a76d7..2dc8c93 100644
--- a/data/05_bpmn_1.JSON
+++ b/data/05_bpmn_1.JSON
@@ -33,7 +33,7 @@
     },
     {
       "id": "gw_result",
-      "name": "",
+      "name": "Result",
       "type": "exclusiveGateway",
       "lane": null,
       "attached_to": null
@@ -61,7 +61,7 @@
     },
     {
       "id": "gw_manager_decision",
-      "name": "",
+      "name": "Manager Decision",
       "type": "exclusiveGateway",
       "lane": null,
       "attached_to": null
diff --git a/data/25_bpmn_3.JSON b/data/25_bpmn_3.JSON
index e74985f..045aa37 100644
--- a/data/25_bpmn_3.JSON
+++ b/data/25_bpmn_3.JSON
@@ -180,7 +180,7 @@
     },
     {
       "id": "xgw_approval_result",
-      "name": "",
+      "name": "Vacation Approval",
       "type": "exclusiveGateway",
       "pool": "pool_vacation",
       "lane": null,
@@ -188,7 +188,7 @@
     },
     {
       "id": "xgw_manual_result",
-      "name": "",
+      "name": "Vacation Approved",
       "type": "exclusiveGateway",
       "pool": "pool_vacation",
       "lane": null,
diff --git a/pyproject.toml b/pyproject.toml
index d4b7e6a..a4975f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,6 +101,10 @@ select = ["E", "F", "I", "W"]
 # human description of the diagram. These are intentionally long prose strings
 # where wrapping would hurt readability, so line length is not enforced here.
 "src/maestro/experiment_config.py" = ["E501"]
+# prompts.py holds the canonical Mermaid output contract: each rule is a single
+# prose line shown to the model verbatim. Wrapping a rule would split it across
+# bullets in the rendered prompt, so line length is not enforced here.
+"src/maestro/prompts.py" = ["E501"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/src/maestro/analysis/metrics.py b/src/maestro/analysis/metrics.py
index d595f6c..18a5e00 100644
--- a/src/maestro/analysis/metrics.py
+++ b/src/maestro/analysis/metrics.py
@@ -133,6 +133,21 @@ def _lemmatize_label(label: str) -> str:
     r"\s*[\]\)\}]+"  # closing bracket(s)
 )
 
+
+def _strip_inline_labels(line: str) -> str:
+    """
+    Replace every inline node definition (``id["Label"]``) with its bare ``id``.
+
+    Edge lines may redeclare a node's label on one or both endpoints, e.g.
+    ``a["A"] --> b["B"]``. The edge regexes expect the id to sit directly
+    against the operator, so a labelled *source* would otherwise break edge
+    extraction. Collapsing each ``id[...]`` to ``id`` leaves a clean
+    ``a --> b`` for the operator scan; node labels are captured separately by
+    ``extract_nodes`` so nothing is lost here.
+    """
+    return _NODE_DEF.sub(lambda m: m.group(1), line)
+
+
 # Edge label between pipes, e.g. -->|"Green (no risk)"| — stripped before node
 # scanning so its text is never mistaken for a node definition.
 _PIPE_LABEL = re.compile(r"\|[^|]*\|")
@@ -240,6 +255,10 @@ def _add(src: str, tgt: str, rel_type: str, undirected: bool = False) -> None:
         line = raw.strip()
         if not line or line.startswith("%%"):
             continue
+        # Collapse any inline node labels (``a["A"] --> b["B"]``) to bare ids
+        # so a labelled source endpoint can't hide the edge from the operator
+        # scan. Node labels themselves are captured by ``extract_nodes``.
+        line = _strip_inline_labels(line)
         for m in _EDGE.finditer(line):
             src, op, tgt = m.group(1), m.group(2), m.group(3)
             if op in ("o--o", "--o", "--x"):
@@ -266,6 +285,9 @@ def extract_attachments(mermaid_code: str) -> list[dict]:
         line = raw.strip()
         if not line or line.startswith("%%"):
             continue
+        # Same inline-label collapse as extract_relationships: an attachment
+        # written ``task["T"] o--o evt(("E"))`` must still match _ATTACH.
+        line = _strip_inline_labels(line)
         for m in _ATTACH.finditer(line):
             a, b = sorted((m.group(1), m.group(2)))
             if (a, b) not in seen:
diff --git a/src/maestro/prompts.py b/src/maestro/prompts.py
new file mode 100644
index 0000000..ee68e63
--- /dev/null
+++ b/src/maestro/prompts.py
@@ -0,0 +1,58 @@
+"""
+MAESTRO — Canonical Mermaid output contract (single source of truth).
+
+The rules that tell a model how to emit Mermaid live HERE and nowhere else.
+Providers supply the system-message identity from ``MERMAID_SYSTEM_IDENTITY``;
+the single-agent baseline and multi-step step 3 build their user prompts from
+``render_rules()``. Defining the contract once keeps every provider and every
+orchestration strategy on a byte-identical output contract — so quality
+differences are attributable to orchestration (the independent variable), not
+to drifting prompt wording.
+
+Why this module imports nothing from ``maestro``: ``providers`` and
+``strategies`` both depend on it, so any back-import would create a cycle.
+Keep it dependency-free (plain strings + one helper).
+
+The optional ``skill`` layer in ``render_rules`` is the future
+prompt-enhancement variable: an append-only block, never an edit to the
+baseline rules, so the enhancement stays an isolatable condition.
+"""
+
+from __future__ import annotations
+
+# System-message identity. Was duplicated verbatim as ``SYSTEM_PROMPT`` in
+# every provider subclass; now defined once and assigned on ``LLMProvider``.
+MERMAID_SYSTEM_IDENTITY = (
+    "You are a diagram generation assistant. "
+    "Respond only with valid Mermaid diagram code. "
+    "Do not include any explanation, markdown fencing, or additional text."
+)
+
+# Unified user-prompt output rules. Was hand-copied (with drift) in
+# ``single.py`` and step 3 of ``_extraction.py``; now one contract applied
+# identically to both. Brace-free on purpose so it can be embedded into a
+# template string ahead of any later ``.format()`` call without escaping.
+MERMAID_RULES = """\
+- Begin the diagram with a flowchart header, `flowchart LR`; do not use C4, sequence, class, or other diagram types
+- Output only valid Mermaid syntax
+- Wrap node labels in double quotes, e.g. node_id["My Label"], so labels with spaces, parentheses, slashes, or line breaks stay parseable
+- If a node has no label, write just its id (e.g. gw_result) — never an empty bracket like node_id[""]
+- Quote edge labels the same way, with no spaces inside the pipes, e.g. a -->|"My edge"| b; for an unlabelled edge use a plain arrow a --> b and never an empty label like -->|| or -->| |
+- Include every entity and relationship from the input
+- Preserve hierarchy using subgraphs for pools, lanes, and subprocesses
+- Do not invent entities or relationships not present in the input
+- Do not include explanations or markdown code fences
+- Do not use internal or relationship IDs as edge labels"""
+
+
+def render_rules(skill: str | None = None) -> str:
+    """
+    Return the canonical Mermaid rules, optionally extended by a skills block.
+
+    The skills block is APPEND-ONLY: it is concatenated after the baseline
+    rules and must never edit them, so a baseline run (``skill=None``) and an
+    enhanced run differ only by the added text. Baseline callers pass ``None``.
+    """
+    if skill is None:
+        return MERMAID_RULES
+    return MERMAID_RULES + "\n" + skill
diff --git a/src/maestro/providers/anthropic.py b/src/maestro/providers/anthropic.py
index 263da4e..92d3fee 100644
--- a/src/maestro/providers/anthropic.py
+++ b/src/maestro/providers/anthropic.py
@@ -28,12 +28,7 @@ class AnthropicProvider(LLMProvider):
     Uses the official anthropic SDK — add 'anthropic>=0.25.0' to pyproject.toml.
     """
 
-    # Instructs the model to output diagram code only — no prose or fencing
-    SYSTEM_PROMPT = (
-        "You are a diagram generation assistant. "
-        "Respond only with valid Mermaid diagram code. "
-        "Do not include any explanation, markdown fencing, or additional text."
-    )
+    # SYSTEM_PROMPT inherited from LLMProvider (maestro.prompts).
 
     # Max tokens for the completion — diagram code is rarely long
     MAX_TOKENS = 4096
diff --git a/src/maestro/providers/base.py b/src/maestro/providers/base.py
index 220b835..b50c0f8 100644
--- a/src/maestro/providers/base.py
+++ b/src/maestro/providers/base.py
@@ -5,6 +5,7 @@
 
 from abc import ABC, abstractmethod
 
+from maestro.prompts import MERMAID_SYSTEM_IDENTITY
 from maestro.schemas import ModelPricing, RunConfig, RunResult
 
 
@@ -17,6 +18,12 @@ class LLMProvider(ABC):
     # Centralized temperature setting — 0 for reproducibility across all providers
     TEMPERATURE = 0
 
+    # Default system identity for Mermaid generation, shared by every provider.
+    # Subclasses inherit this; a strategy that needs a different identity for a
+    # given call (e.g. SOP steps 1-2 requesting JSON) passes system_prompt
+    # explicitly to complete().
+    SYSTEM_PROMPT = MERMAID_SYSTEM_IDENTITY
+
     def __init__(self, api_key: str, pricing: ModelPricing) -> None:
         # api_key stored on instance — never logged or serialised
         self.api_key = api_key
diff --git a/src/maestro/providers/gemini.py b/src/maestro/providers/gemini.py
index 52729a6..dab9267 100644
--- a/src/maestro/providers/gemini.py
+++ b/src/maestro/providers/gemini.py
@@ -24,11 +24,7 @@ class GeminiProvider(LLMProvider):
     Uses the official google-genai SDK — add 'google-genai>=1.0' to pyproject.toml.
     """
 
-    SYSTEM_PROMPT = (
-        "You are a diagram generation assistant. "
-        "Respond only with valid Mermaid diagram code. "
-        "Do not include any explanation, markdown fencing, or additional text."
-    )
+    # SYSTEM_PROMPT inherited from LLMProvider (maestro.prompts).
 
     MAX_TOKENS = 4096
 
diff --git a/src/maestro/providers/mistral.py b/src/maestro/providers/mistral.py
index 2821b17..d61f75e 100644
--- a/src/maestro/providers/mistral.py
+++ b/src/maestro/providers/mistral.py
@@ -25,11 +25,7 @@ class MistralProvider(LLMProvider):
     ``mistralai.client.errors`` respectively.
     """
 
-    SYSTEM_PROMPT = (
-        "You are a diagram generation assistant. "
-        "Respond only with valid Mermaid diagram code. "
-        "Do not include any explanation, markdown fencing, or additional text."
-    )
+    # SYSTEM_PROMPT inherited from LLMProvider (maestro.prompts).
 
     MAX_TOKENS = 4096
 
diff --git a/src/maestro/providers/openai.py b/src/maestro/providers/openai.py
index 8ebb125..0f0d6dd 100644
--- a/src/maestro/providers/openai.py
+++ b/src/maestro/providers/openai.py
@@ -36,12 +36,7 @@ class OpenAIProvider(LLMProvider):
     # logged as "openai", misattributing failures in multi-provider runs.
     _PROVIDER_NAME = "openai"
 
-    # Same role as AnthropicProvider.SYSTEM_PROMPT
-    SYSTEM_PROMPT = (
-        "You are a diagram generation assistant. "
-        "Respond only with valid Mermaid diagram code. "
-        "Do not include any explanation, markdown fencing, or additional text."
-    )
+    # SYSTEM_PROMPT inherited from LLMProvider (maestro.prompts).
 
     # Max tokens for the completion
     MAX_TOKENS = 4096
diff --git a/src/maestro/run.py b/src/maestro/run.py
index 809cff0..8d82ac2 100644
--- a/src/maestro/run.py
+++ b/src/maestro/run.py
@@ -231,8 +231,10 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--strategy",
         type=str,
-        choices=[s.value for s in Strategy],
-        help="Run only this strategy (default: all enabled)",
+        help=(
+            "Run only these strategies (default: all enabled). "
+            "Comma-separated for several, e.g. --strategy single_agent,lang_graph"
+        ),
     )
     parser.add_argument(
         "--tier",
@@ -243,12 +245,19 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--model",
         type=str,
-        help="Run only this model (default: all registered)",
+        help=(
+            "Run only these models (default: all registered). "
+            "Comma-separated for several, e.g. "
+            "--model gpt-4o-mini-2024-07-18,deepseek-v4-flash"
+        ),
     )
     parser.add_argument(
         "--example",
         type=str,
-        help="Run only this example_id (default: all registered)",
+        help=(
+            "Run only these example_ids (default: all registered). "
+            "Comma-separated for several, e.g. --example bpmn_1_03,it_1_07"
+        ),
     )
     parser.add_argument(
         "--repeats",
@@ -294,30 +303,67 @@ def parse_args() -> argparse.Namespace:
 # ---------------------------------------------------------------------------
 
 
+def _split_csv(value: str | None) -> list[str] | None:
+    """
+    Parse a comma-separated filter value into a clean list, or None if the
+    flag was absent. Empty/whitespace-only entries are dropped so trailing
+    commas and stray spaces don't create phantom filter values.
+    """
+    if value is None:
+        return None
+    return [part.strip() for part in value.split(",") if part.strip()]
+
+
 def build_matrix(args: argparse.Namespace) -> list[dict]:
     """
     Build the experiment matrix as a list of dicts, each representing one run.
-    Applies CLI filters to narrow the cross-product.
+    Applies CLI filters to narrow the cross-product. The --strategy, --model
+    and --example flags accept a comma-separated list (membership filter).
     """
+    examples = _split_csv(args.example)
+    model_names = _split_csv(args.model)
+    strategy_names = _split_csv(args.strategy)
+
+    # Validate filter values up front (argparse no longer does, now that the
+    # flags accept lists). Catches a typo before any matrix work or API spend —
+    # a misspelled value in a list would otherwise silently shrink the matrix.
+    def _reject_unknown(flag: str, given: list[str], valid: set[str]) -> None:
+        unknown = [v for v in given if v not in valid]
+        if unknown:
+            print(
+                f"ERROR: unknown {flag} value(s): {', '.join(unknown)}. "
+                f"Known: {', '.join(sorted(valid))}",
+                file=sys.stderr,
+            )
+            sys.exit(2)
+
+    if strategy_names:
+        _reject_unknown("--strategy", strategy_names, {s.value for s in Strategy})
+    if examples:
+        _reject_unknown("--example", examples, {i.example_id for i in INPUTS})
+    # --model is validated below, after the strategy filter is known: an unknown
+    # model only matters when a real (LLM) strategy is actually selected, so the
+    # control-only no-op (--strategy null_control --model anything) is preserved.
+
     # Filter inputs
     inputs = INPUTS
     if args.tier:
         inputs = [i for i in inputs if i.tier.value == args.tier]
-    if args.example:
-        inputs = [i for i in inputs if i.example_id == args.example]
+    if examples:
+        inputs = [i for i in inputs if i.example_id in examples]
 
     # Filter strategies
     strategies = STRATEGIES
-    if args.strategy:
-        strategies = [s for s in strategies if s.value == args.strategy]
+    if strategy_names:
+        strategies = [s for s in strategies if s.value in strategy_names]
 
     # Filter models — applies only to real (LLM) strategies. Control rows
     # ignore --model because they don't use a model; --model gpt-4o-mini
     # should narrow which LLM rows run but should not silently drop the
     # sanity floor/ceiling rows the experiment needs.
     models = MODELS
-    if args.model:
-        models = [m for m in models if m.model == args.model]
+    if model_names:
+        models = [m for m in models if m.model in model_names]
 
     # Partition by strategy kind. Controls are deterministic in (model,
     # repeat) — collapsing both dimensions to a single row per
@@ -326,22 +372,22 @@ def build_matrix(args: argparse.Namespace) -> list[dict]:
     real_strategies = [s for s in strategies if s not in CONTROL_STRATEGIES]
     control_strategies = [s for s in strategies if s in CONTROL_STRATEGIES]
 
-    # Fail fast on an unknown --model only when it actually matters:
-    # `--strategy null_control --model typo` should be a no-op on --model
-    # (controls don't use any model) rather than aborting. Without this
-    # guard, however, `--model gpt-4o-min` would silently produce just
-    # the 3 control rows when the user wanted a single LLM cell — looks
-    # like a tiny matrix instead of the misuse it is. So: validate the
-    # model flag only when there's at least one real strategy left after
-    # the strategy filter.
-    if args.model and real_strategies and not models:
-        known = ", ".join(m.model for m in MODELS)
-        print(
-            f"ERROR: --model {args.model!r} matches no registered model. "
-            f"Known: {known}",
-            file=sys.stderr,
-        )
-        sys.exit(2)
+    # Fail fast on any unknown --model value, but only when a real (LLM)
+    # strategy is selected. `--strategy null_control --model typo` stays a
+    # no-op on --model (controls don't use any model), so it must not abort.
+    # When a real strategy IS selected, a misspelled model would otherwise
+    # silently shrink the matrix (e.g. `--model gpt-4o-mini-2024-07-18,typo`
+    # would quietly run only the valid one) — so reject the typo loudly.
+    if model_names and real_strategies:
+        registered = {m.model for m in MODELS}
+        unknown = [m for m in model_names if m not in registered]
+        if unknown:
+            print(
+                f"ERROR: unknown --model value(s): {', '.join(unknown)}. "
+                f"Known: {', '.join(sorted(registered))}",
+                file=sys.stderr,
+            )
+            sys.exit(2)
 
     matrix = []
 
diff --git a/src/maestro/strategies/_extraction.py b/src/maestro/strategies/_extraction.py
index a479191..e3d3269 100644
--- a/src/maestro/strategies/_extraction.py
+++ b/src/maestro/strategies/_extraction.py
@@ -18,6 +18,8 @@
 
 import json
 
+from maestro.prompts import render_rules
+
 # ---------------------------------------------------------------------------
 # Step 1 — Extract entities (nodes) from the input dataset
 # ---------------------------------------------------------------------------
@@ -84,23 +86,22 @@
 # Step 3 — Render Mermaid diagram from entities + relationships
 # ---------------------------------------------------------------------------
 
-STEP_3_PROMPT = """\
+# Rules come from the canonical contract (maestro.prompts) so step 3 and the
+# single-agent baseline are given a byte-identical output contract. The runtime
+# placeholders are escaped (``{{...}}``) so they survive this f-string and are
+# filled later by ``.format(entities_json=..., relationships_json=...)``.
+STEP_3_PROMPT = f"""\
 You are given a set of entities and relationships extracted from a dataset.
 Your task is to generate a Mermaid diagram that accurately represents them.
 
 Rules:
-- Output only valid Mermaid syntax
-- Include all entities with correct hierarchy (subgraphs for pools/lanes/subprocesses)
-- Include all relationships as edges
-- Do not invent entities or relationships not provided
-- Do not include explanations or markdown code fences
-- Do not use relationship IDs as edge labels
+{render_rules()}
 
 Entities:
-{entities_json}
+{{entities_json}}
 
 Relationships:
-{relationships_json}
+{{relationships_json}}
 """
 
 
diff --git a/src/maestro/strategies/single.py b/src/maestro/strategies/single.py
index 7ef2336..b9d3182 100644
--- a/src/maestro/strategies/single.py
+++ b/src/maestro/strategies/single.py
@@ -19,22 +19,23 @@
 
 import json
 
+from maestro.prompts import render_rules
 from maestro.schemas import InputFile, RunConfig, RunResult, SubResult
 from maestro.strategies.base import BaseStrategy
 
-PROMPT_TEMPLATE = """\
+# Rules come from the canonical contract (maestro.prompts) so single-agent and
+# the multi-step strategies are given a byte-identical output contract. The
+# runtime placeholder is escaped as ``{{input_data}}`` so it survives this
+# f-string and is filled later by ``.format(input_data=...)``.
+PROMPT_TEMPLATE = f"""\
 You are given a dataset describing entities and their relationships.
 Your task is to generate a Mermaid diagram that accurately represents this data.
 
 Rules:
-- Output only valid Mermaid syntax
-- Include all entities and relationships from the input
-- Do not invent entities or relationships not present in the data
-- Do not include explanations or markdown code fences
-- Do not use internal IDs as edge labels
+{render_rules()}
 
 Input data:
-{input_data}
+{{input_data}}
 """
 
 
diff --git a/tests/analysis/test_extraction.py b/tests/analysis/test_extraction.py
index 0621f10..39854e7 100644
--- a/tests/analysis/test_extraction.py
+++ b/tests/analysis/test_extraction.py
@@ -180,6 +180,31 @@ def test_sequence_flow_solid_arrow():
     assert rels[0]["type"] == "sequence_flow"
 
 
+def test_edge_with_inline_labels_on_both_endpoints():
+    """
+    Regression: a model may redeclare node labels on the edge line itself,
+    e.g. ``a["A"] --> b["B"]``. A labelled *source* used to hide the edge from
+    the operator scan (returned []), zeroing the relationship score on an
+    otherwise-correct diagram. Bare ids must still be recovered.
+    """
+    code = 'flowchart TD\n    a["A"] --> b["B"]\n    b["B"] -->|"go"| c["C"]\n'
+    assert _pairs(extract_relationships(code)) == {("a", "b"), ("b", "c")}
+
+
+def test_edge_inline_label_with_newline_and_parens():
+    """The redeclared label can contain \\n and parentheses (BPMN gateways)."""
+    code = 'flowchart TD\n    gw{"Gateway\\n(Split)"} --> task["Task 1"]\n'
+    assert _pairs(extract_relationships(code)) == {("gw", "task")}
+
+
+def test_attachment_with_inline_labels_on_both_endpoints():
+    """Same inline-label collapse must apply to o--o attachment edges."""
+    code = 'flowchart LR\n    host["Host"] o--o evt(("Boundary"))\n'
+    atts = extract_attachments(code)
+    assert len(atts) == 1
+    assert tuple(sorted((atts[0]["a"], atts[0]["b"]))) == ("evt", "host")
+
+
 # ---------------------------------------------------------------------------
 # 3b — container metrics (reuse entity matchers)
 # ---------------------------------------------------------------------------
diff --git a/tests/test_prompts.py b/tests/test_prompts.py
new file mode 100644
index 0000000..5804326
--- /dev/null
+++ b/tests/test_prompts.py
@@ -0,0 +1,158 @@
+"""
+Guards for the single-source-of-truth Mermaid output contract (maestro.prompts).
+
+These tests pin the contract so it cannot silently drift back into the
+per-provider / per-strategy copies it was consolidated from:
+
+  * snapshot           — the canonical identity + rules text is intentional,
+                         so a change must be a deliberate edit to this file.
+  * no-duplication     — every provider's SYSTEM_PROMPT *is* the shared
+                         identity object (catches anyone re-inlining a literal),
+                         and the identity actually reaches provider.complete().
+  * identical-injection — the rules block embedded in the single-agent prompt
+                         equals the one in multi-step step 3 (the drift this
+                         refactor fixed).
+"""
+
+from __future__ import annotations
+
+from maestro.prompts import (
+    MERMAID_RULES,
+    MERMAID_SYSTEM_IDENTITY,
+    render_rules,
+)
+from maestro.providers.anthropic import AnthropicProvider
+from maestro.providers.base import LLMProvider
+from maestro.providers.deepseek import DeepSeekProvider
+from maestro.providers.gemini import GeminiProvider
+from maestro.providers.mistral import MistralProvider
+from maestro.providers.openai import OpenAIProvider
+from maestro.strategies._extraction import STEP_3_PROMPT
+from maestro.strategies.single import PROMPT_TEMPLATE
+
+# Concrete providers whose SYSTEM_PROMPT must resolve to the shared identity.
+# deepseek defines no literal of its own — it must inherit via OpenAIProvider.
+_PROVIDERS = [
+    AnthropicProvider,
+    OpenAIProvider,
+    GeminiProvider,
+    MistralProvider,
+    DeepSeekProvider,
+]
+
+
+# ---------------------------------------------------------------------------
+# Snapshot — canonical text is pinned; edits must be deliberate.
+# ---------------------------------------------------------------------------
+
+
+def test_system_identity_snapshot():
+    assert MERMAID_SYSTEM_IDENTITY == (
+        "You are a diagram generation assistant. "
+        "Respond only with valid Mermaid diagram code. "
+        "Do not include any explanation, markdown fencing, or additional text."
+    )
+
+
+def test_rules_snapshot():
+    assert MERMAID_RULES == (
+        "- Begin the diagram with a flowchart header, `flowchart LR`; "
+        "do not use C4, sequence, class, or other diagram types\n"
+        "- Output only valid Mermaid syntax\n"
+        '- Wrap node labels in double quotes, e.g. node_id["My Label"], so '
+        "labels with spaces, parentheses, slashes, or line breaks stay "
+        "parseable\n"
+        "- If a node has no label, write just its id (e.g. gw_result) — "
+        'never an empty bracket like node_id[""]\n'
+        "- Quote edge labels the same way, with no spaces inside the pipes, "
+        'e.g. a -->|"My edge"| b; for an unlabelled edge use a plain arrow '
+        "a --> b and never an empty label like -->|| or -->| |\n"
+        "- Include every entity and relationship from the input\n"
+        "- Preserve hierarchy using subgraphs for pools, lanes, and subprocesses\n"
+        "- Do not invent entities or relationships not present in the input\n"
+        "- Do not include explanations or markdown code fences\n"
+        "- Do not use internal or relationship IDs as edge labels"
+    )
+
+
+def test_rules_are_brace_free():
+    """Braces would break the later .format() call on the strategy templates."""
+    assert "{" not in MERMAID_RULES and "}" not in MERMAID_RULES
+
+
+# ---------------------------------------------------------------------------
+# No-duplication — providers share the one identity object, not copies.
+# ---------------------------------------------------------------------------
+
+
+def test_base_defines_the_identity():
+    assert LLMProvider.SYSTEM_PROMPT is MERMAID_SYSTEM_IDENTITY
+
+
+def test_no_provider_reinlines_system_prompt():
+    for provider_cls in _PROVIDERS:
+        assert provider_cls.SYSTEM_PROMPT is MERMAID_SYSTEM_IDENTITY, (
+            f"{provider_cls.__name__} re-inlined SYSTEM_PROMPT instead of "
+            "inheriting the shared identity"
+        )
+
+
+def test_fallback_identity_resolves_to_shared(recording_provider_factory):
+    """
+    When a caller passes system_prompt=None, real providers fall back to
+    self.SYSTEM_PROMPT (the ``system_prompt if system_prompt is not None else
+    self.SYSTEM_PROMPT`` expression in every complete()). This exercises that
+    fallback expression against an LLMProvider subclass and asserts it resolves
+    to the shared identity object — so a re-inlined per-provider literal would
+    be caught here, not just at the class-attribute level.
+    """
+    provider = recording_provider_factory(outputs=["graph TD\n  a"])
+    system_prompt = None
+    effective_system = (
+        system_prompt if system_prompt is not None else provider.SYSTEM_PROMPT
+    )
+    assert effective_system is MERMAID_SYSTEM_IDENTITY
+
+
+# ---------------------------------------------------------------------------
+# Identical-injection — single-agent and step 3 carry the same rules block.
+# ---------------------------------------------------------------------------
+
+
+def _rules_block(template: str) -> str:
+    """Extract the block under 'Rules:' up to the next blank line."""
+    return template.split("Rules:\n", 1)[1].split("\n\n", 1)[0]
+
+
+def test_single_and_step3_inject_identical_rules():
+    single_rules = _rules_block(PROMPT_TEMPLATE)
+    step3_rules = _rules_block(STEP_3_PROMPT)
+    assert single_rules == step3_rules == render_rules()
+
+
+def test_templates_keep_runtime_placeholders():
+    """The .format() placeholders must survive the f-string composition."""
+    assert "{input_data}" in PROMPT_TEMPLATE
+    assert "{entities_json}" in STEP_3_PROMPT
+    assert "{relationships_json}" in STEP_3_PROMPT
+
+
+def test_templates_format_without_stray_braces():
+    PROMPT_TEMPLATE.format(input_data="{}")
+    STEP_3_PROMPT.format(entities_json="[]", relationships_json="[]")
+
+
+# ---------------------------------------------------------------------------
+# render_rules — baseline vs append-only skills layer.
+# ---------------------------------------------------------------------------
+
+
+def test_render_rules_baseline_is_canonical():
+    assert render_rules() == MERMAID_RULES
+    assert render_rules(None) == MERMAID_RULES
+
+
+def test_render_rules_skill_is_append_only():
+    rendered = render_rules("- Prefer graph LR")
+    assert rendered.startswith(MERMAID_RULES)
+    assert rendered.endswith("- Prefer graph LR")
diff --git a/tests/test_run_filters.py b/tests/test_run_filters.py
new file mode 100644
index 0000000..efd1c98
--- /dev/null
+++ b/tests/test_run_filters.py
@@ -0,0 +1,103 @@
+"""
+Filter validation in run.build_matrix.
+
+The --strategy / --model / --example flags accept comma-separated lists. A typo
+in a list would silently shrink the experiment matrix (run fewer cells than the
+user intended) — costly to notice halfway through a multi-hour run. These tests
+pin the fail-fast behavior:
+
+  * unknown --strategy / --example  → exit 2 (strict)
+  * unknown --model                 → exit 2 *only* when a real LLM strategy is
+                                      selected; a control-only run ignores
+                                      --model (controls use no model), so a
+                                      bad model there is a deliberate no-op.
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import pytest
+
+from maestro.run import _split_csv, build_matrix
+
+
+def _args(**overrides) -> argparse.Namespace:
+    """Namespace with the build_matrix-relevant fields, overridable per test."""
+    base = dict(strategy=None, tier=None, model=None, example=None, repeats=1)
+    base.update(overrides)
+    return argparse.Namespace(**base)
+
+
+# ---------------------------------------------------------------------------
+# _split_csv
+# ---------------------------------------------------------------------------
+
+
+def test_split_csv_none_passes_through():
+    assert _split_csv(None) is None
+
+
+def test_split_csv_trims_and_drops_empties():
+    assert _split_csv("a, b ,,c,") == ["a", "b", "c"]
+
+
+# ---------------------------------------------------------------------------
+# Valid filters build a matrix
+# ---------------------------------------------------------------------------
+
+
+def test_valid_list_filters_build_matrix():
+    args = _args(
+        example="bpmn_1_03,it_1_07",
+        model="gpt-4o-mini-2024-07-18,deepseek-v4-flash",
+        strategy="single_agent,lang_graph",
+        repeats=2,
+    )
+    matrix = build_matrix(args)
+    # 2 inputs × 2 strategies × 2 models × 2 repeats
+    assert len(matrix) == 16
+    assert {c["input_file"].example_id for c in matrix} == {"bpmn_1_03", "it_1_07"}
+    assert {c["model_pricing"].model for c in matrix} == {
+        "gpt-4o-mini-2024-07-18",
+        "deepseek-v4-flash",
+    }
+
+
+# ---------------------------------------------------------------------------
+# Unknown values fail fast
+# ---------------------------------------------------------------------------
+
+
+def test_unknown_strategy_exits_2():
+    with pytest.raises(SystemExit) as exc:
+        build_matrix(_args(strategy="single_agent,typo_strat"))
+    assert exc.value.code == 2
+
+
+def test_unknown_example_exits_2():
+    with pytest.raises(SystemExit) as exc:
+        build_matrix(_args(example="bpmn_1_03,not_a_real_id", strategy="single_agent"))
+    assert exc.value.code == 2
+
+
+def test_unknown_model_exits_2_with_real_strategy():
+    with pytest.raises(SystemExit) as exc:
+        build_matrix(
+            _args(
+                model="gpt-4o-mini-2024-07-18,typo_model",
+                strategy="single_agent",
+                example="bpmn_1_03",
+            )
+        )
+    assert exc.value.code == 2
+
+
+def test_unknown_model_is_noop_for_control_only_run():
+    """Controls use no model, so a bad --model must not abort a control run."""
+    matrix = build_matrix(
+        _args(model="typo_model", strategy="null_control", example="bpmn_1_03")
+    )
+    # One control row for the single input — bad model ignored, no exit.
+    assert len(matrix) == 1
+    assert matrix[0]["strategy"].value == "null_control"