diff --git a/README.md b/README.md
index 53bcd51..542223d 100644
--- a/README.md
+++ b/README.md
@@ -231,13 +231,13 @@ NicNodePolicy (await each), all remaining CRs in one batch (controllers
 reconcile concurrently), then verify every manifest reached a terminal state.
 Example workload manifests (`*example*`) are **not** applied by `l8k deploy` —
 they're fixtures consumed by `l8k validate --connectivity` or
-`l8k deploy --verify` for the data-plane phase. It auto-prefers
+`l8k deploy --test-connectivity` for the data-plane phase. It auto-prefers
 `<dir>/network-operator/` (the layout `l8k generate` produces) and falls back
 to `<dir>` itself. `--dry-run` does a server-side dry run. `--deploy-timeout`
 caps the whole apply+reconcile phase end-to-end (e.g. `--deploy-timeout 90m`);
 without it, deploy polls indefinitely — right for SR-IOV on large clusters
-where reconciliation can take an hour. `--verify` chains the connectivity
-matrix straight after a successful apply.
+where reconciliation can take an hour. `--test-connectivity` chains the
+connectivity matrix straight after a successful apply.
 
 Verify the deployment end-to-end:
 
@@ -262,7 +262,7 @@ a per-pair cross-rail canary. The matrix is on by default
 (`--connectivity=false` to skip), runs concurrent pings capped at 16, and
 cleans up the test DaemonSet unless `--keep` is set.
 
-A self-contained HTML report lands at `<deployment-files>/verify-report.html`
+A self-contained HTML report lands at `<deployment-files>/k8s-launch-kit-validation-report.html`
 by default (override with `--report-path`, disable with `--report-path=-`).
 The report has: header (l8k version, kubeconfig context, API-server version),
 profile, **Node groups** (per-`clusterConfig[]` entry with east-west / north-
diff --git a/pkg/cmd/deploy.go b/pkg/cmd/deploy.go
index 3a2392f..3e31eb2 100644
--- a/pkg/cmd/deploy.go
+++ b/pkg/cmd/deploy.go
@@ -40,10 +40,11 @@ var (
 	// until every manifest reaches a terminal state or the user
 	// cancels.
 	deployTimeout time.Duration
-	// deployVerify chains a `l8k validate --connectivity` run right
-	// after a successful deploy. Off by default; opt-in for
+	// deployTestConnectivity chains the data-plane connectivity
+	// matrix (the same one `l8k validate --connectivity` runs)
+	// right after a successful deploy. Off by default; opt-in for
 	// pipelines that want end-to-end verification in one command.
-	deployVerify bool
+	deployTestConnectivity bool
 )
 
 // DefaultDeploymentDir is the default directory `l8k generate` writes
@@ -148,7 +149,7 @@ is used as the manifest directory.`,
 			uiOutput.Success("Deployment completed")
 		}
 
-		if deployVerify && !dryRunFlag {
+		if deployTestConnectivity && !dryRunFlag {
 			// Pipeline the connectivity matrix straight after a
 			// successful apply. We deliberately do NOT re-run the
 			// version check here — deploy just landed manifests
@@ -157,16 +158,15 @@ is used as the manifest directory.`,
 			matrix, err := connectivity.RunMatrix(ctx, k8sClient, restConfig, uiOutput, connectivity.Options{
 				ManifestDir: manifestDir,
 				Timeout:     0, // RunMatrix defaults to 5m
-				PingCount:   0, // RunMatrix defaults to 3
 			})
 			if err != nil {
 				exitWithError(apperrors.NewClusterError(
-					"deploy succeeded but --verify failed",
+					"deploy succeeded but --test-connectivity failed",
 					err,
 					"Inspect the test DaemonSet via --keep + kubectl get pods",
 				), outputFormat)
 			}
-			if matrix != nil && (matrix.Summary.Failed > 0 || matrix.Summary.ExecErrors > 0) {
+			if matrix != nil && matrix.Summary.Failed > 0 {
 				exitWithError(apperrors.NewDeploymentError(
 					fmt.Sprintf("connectivity matrix failed: %d/%d passed", matrix.Summary.Passed, matrix.Summary.TotalTests),
 					nil,
@@ -201,11 +201,11 @@ func init() {
 	deployCmd.Flags().StringVar(&deploymentFiles, "deployment-files", DefaultDeploymentDir, "Directory containing the manifests to apply")
 	deployCmd.Flags().BoolVar(&dryRunFlag, "dry-run", false, "Preview the deployment via server-side dry-run without persisting changes")
 	deployCmd.Flags().DurationVar(&deployTimeout, "deploy-timeout", 0, "Maximum end-to-end wall-clock budget for the deploy phase (e.g. 45m, 2h). 0 (the default) means no deadline; the deploy polls until every manifest reaches a terminal state. Useful for matching a maintenance window when SR-IOV reconciliation on a large cluster can take an hour or more.")
-	deployCmd.Flags().BoolVar(&deployVerify, "verify", false, "After a successful deploy, run the connectivity matrix (apply example DaemonSet → wait Ready → ping matrix → cleanup) to verify the data plane end-to-end.")
+	deployCmd.Flags().BoolVar(&deployTestConnectivity, "test-connectivity", false, "After a successful deploy, run the connectivity matrix (apply example DaemonSet → wait Ready → RDMA matrix → cleanup) to verify the data plane end-to-end.")
 
 	setFlagGroup(deployCmd, "kubeconfig", GroupCommon)
 	setFlagGroup(deployCmd, "deployment-files", GroupGeneration)
 	setFlagGroup(deployCmd, "deploy-timeout", GroupDeploy)
 	setFlagGroup(deployCmd, "dry-run", GroupDeploy)
-	setFlagGroup(deployCmd, "verify", GroupDeploy)
+	setFlagGroup(deployCmd, "test-connectivity", GroupDeploy)
 }
diff --git a/pkg/cmd/validate.go b/pkg/cmd/validate.go
index 9d69675..fb78647 100644
--- a/pkg/cmd/validate.go
+++ b/pkg/cmd/validate.go
@@ -41,26 +41,26 @@ import (
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/connectivity"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/crstate"
+	"github.com/nvidia/k8s-launch-kit/pkg/presetmatch"
+	"github.com/nvidia/k8s-launch-kit/pkg/presets"
 	"github.com/nvidia/k8s-launch-kit/pkg/ui"
 )
 
 // Phase 2 connectivity-test flags. `--connectivity` defaults to ON —
 // every `l8k validate` verifies the data plane (apply example DS,
-// wait Ready, ping matrix, cleanup) unless the caller passes
+// wait Ready, RDMA matrix, cleanup) unless the caller passes
 // `--connectivity=false`. The other flags tune matrix behaviour
-// (--ping-count, --connectivity-timeout, --keep) or extend the
-// validate semantics (--wait blocks until in-progress manifests reach
-// a terminal state).
+// (--connectivity-timeout, --keep) or extend the validate semantics
+// (--wait blocks until in-progress manifests reach a terminal state).
 //
-// Phase 3 adds --report-path: emit an HTML verify-report alongside
+// Phase 3 adds --report-path: emit an HTML validation report alongside
 // the text/JSON output. Empty default means "auto-place at
-// <deployment-files>/verify-report.html"; the literal "-" disables
-// report writing.
+// <deployment-files>/k8s-launch-kit-validation-report.html"; the
+// literal "-" disables report writing.
 var (
 	validateConnectivity        bool
 	validateKeep                bool
 	validateConnectivityTimeout time.Duration
-	validatePingCount           int
 	validateWait                time.Duration
 	validateReportPath          string
 )
@@ -130,6 +130,7 @@ connectivity-matrix failure.`,
 			matrix           *connectivity.MatrixResult
 			warnings         []string
 			presetDeviations []groupDeviationReport
+			presetResults    []presetmatch.Result
 			reportClient     ctrlclient.Client
 			reportRestConfig *rest.Config
 			reportManifestDir string
@@ -145,10 +146,19 @@ connectivity-matrix failure.`,
 			if err != nil {
 				warnings = append(warnings, err.Error())
 			}
+			// Compute the overall verdict from whatever inputs we
+			// have at this exit point — verdict may not have been
+			// computed yet on an early-error path, so build a
+			// pessimistic FAIL with the error as the reason.
+			overall := connectivity.OverallVerdict{Pass: false}
+			if err != nil {
+				overall.Reasons = append(overall.Reasons, err.Error())
+			}
+			emitVerdictBanner(overall, outputFormat)
 			writeHTMLReportIfWanted(context.Background(), reportClient, reportRestConfig,
 				reportManifestDir, deploymentFiles,
 				operatorNamespace, versionCheck, componentCheck, results, &matrix, &warnings,
-				userConfigPath(), outputFormat)
+				presetResults, overall, userConfigPath(), outputFormat)
 			exitWithError(err, outputFormat)
 		}
 
@@ -192,6 +202,14 @@ connectivity-matrix failure.`,
 						Deviations:  g.PresetDeviation,
 					})
 				}
+				// Re-run preset matching at validate-time. This
+				// catches drift between the cluster-config.yaml's
+				// stored hardware view and the certified preset
+				// even when discover wasn't re-run. Results stay
+				// informational — a deviation doesn't fail
+				// validate (matches the historical behaviour of
+				// presetDeviation), so the exit code is unchanged.
+				presetResults = presetmatch.MatchAll(cfg)
 			} else if err != nil {
 				log.Log.V(1).Info("user-config not loaded; version check will be skipped",
 					"path", path, "error", err.Error())
@@ -259,6 +277,7 @@ connectivity-matrix failure.`,
 
 		verdict := emitValidationReport(versionCheck, results, presetDeviations, outputFormat)
 		emitComponentVersionReport(componentCheck, outputFormat)
+		emitPresetMatchReport(presetResults, outputFormat)
 		warnings = append(warnings, collectVerdictWarnings(verdict)...)
 		if componentCheck != nil && !componentCheck.Skipped && !componentCheck.AllMatch {
 			warnings = append(warnings, "Component versions in NicClusterPolicy / NicNodePolicy diverge from the selectedRelease catalog — see the report's components section.")
@@ -269,18 +288,23 @@ connectivity-matrix failure.`,
 		// connectivity failure) since Go's defer doesn't run on
 		// os.Exit and exitWithError does os.Exit.
 		emitReport := func() {
+			overall := computeOverallVerdict(verdict, componentCheck, matrix, presetResults)
+			emitVerdictBanner(overall, outputFormat)
 			writeHTMLReportIfWanted(ctx, k8sClient, restConfig, manifestDir, deploymentFiles,
 				operatorNamespace, versionCheck, componentCheck, results, &matrix, &warnings,
-				userConfigPath(), outputFormat)
+				presetResults, overall, userConfigPath(), outputFormat)
 		}
 
-		// `--connectivity` runs the data-plane ping matrix only when
-		// every CR is reconciled — otherwise we'd just produce noise.
+		// `--connectivity` runs the data-plane matrix when no manifest
+		// is broken (error or missing). Version mismatches (Helm
+		// release or per-component) are *not* fatal here — the cluster
+		// is still up, so the connectivity tests are still meaningful;
+		// the final verdict picks up the mismatch as a fail reason.
 		// In-progress (without errors) prints a warning and exits 0
 		// so CI/operators can re-run later.
 		componentMismatch := componentCheck != nil && !componentCheck.Skipped && !componentCheck.AllMatch
 		switch {
-		case verdict.HasError || verdict.HasMissing || !verdict.VersionOK || componentMismatch:
+		case verdict.HasError || verdict.HasMissing:
 			emitReport()
 			os.Exit(apperrors.ExitDeployment)
 		case verdict.HasInProgress:
@@ -292,6 +316,9 @@ connectivity-matrix failure.`,
 			}
 			warnings = append(warnings, "Connectivity matrix skipped — cluster has in-progress manifests.")
 			emitReport()
+			if !verdict.VersionOK || componentMismatch {
+				os.Exit(apperrors.ExitDeployment)
+			}
 			return
 		}
 
@@ -301,7 +328,6 @@ connectivity-matrix failure.`,
 			m, err := connectivity.RunMatrix(ctxWithUI, k8sClient, restConfig, uiOutput, connectivity.Options{
 				ManifestDir: manifestDir,
 				Timeout:     validateConnectivityTimeout,
-				PingCount:   validatePingCount,
 				Keep:        validateKeep,
 			})
 			matrix = m
@@ -320,16 +346,187 @@ connectivity-matrix failure.`,
 			if matrix != nil && matrix.Skipped != nil {
 				warnings = append(warnings, "Connectivity matrix skipped: "+matrix.Skipped.Reason)
 			}
-			if matrix != nil && (matrix.Summary.Failed > 0 || matrix.Summary.ExecErrors > 0) {
-				emitReport()
-				os.Exit(apperrors.ExitDeployment)
-			}
 		}
 
 		emitReport()
+		// Final exit code unifies every gating signal: matrix failures,
+		// version/component mismatches, AND preset deviations all fail
+		// the verdict, but the matrix still gets a chance to run when
+		// the only problem is a stale Helm release / catalog mismatch
+		// / hardware drift from the catalog preset.
+		matrixFailed := matrix != nil && matrix.Summary.Failed > 0
+		if matrixFailed || !verdict.VersionOK || componentMismatch || hasPresetDeviation(presetResults) {
+			os.Exit(apperrors.ExitDeployment)
+		}
 	},
 }
 
+// hasPresetDeviation reports whether any group deviated from its
+// matched preset. Drives the non-zero exit code in addition to the
+// PASS/FAIL banner.
+func hasPresetDeviation(results []presetmatch.Result) bool {
+	for _, r := range results {
+		if r.Status == presetmatch.StatusDeviation {
+			return true
+		}
+	}
+	return false
+}
+
+// computeOverallVerdict folds every input from the validate run into
+// a single pass/fail outcome — the same logic used to decide the
+// process exit code, expressed as structured Reasons / Notes so the
+// banner can list what failed (and what was merely informational).
+func computeOverallVerdict(
+	verdict validationVerdict,
+	componentCheck *networkoperatorplugin.ComponentVersionCheck,
+	matrix *connectivity.MatrixResult,
+	presetResults []presetmatch.Result,
+) connectivity.OverallVerdict {
+	out := connectivity.OverallVerdict{Pass: true}
+	if !verdict.VersionOK {
+		out.Pass = false
+		out.Reasons = append(out.Reasons, "Network Operator Helm release version does not match the selectedRelease in cluster-config.yaml")
+	}
+	if verdict.HasMissing {
+		out.Pass = false
+		out.Reasons = append(out.Reasons, fmt.Sprintf("%d manifest(s) not found in the cluster — `l8k deploy` not run or partial", verdict.MissingCount))
+	}
+	if verdict.HasError {
+		out.Pass = false
+		out.Reasons = append(out.Reasons, fmt.Sprintf("%d manifest(s) reported an error state", verdict.ErrorCount))
+	}
+	if componentCheck != nil && !componentCheck.Skipped && !componentCheck.AllMatch {
+		mismatches := 0
+		for _, c := range componentCheck.Components {
+			if !c.Match {
+				mismatches++
+			}
+		}
+		out.Pass = false
+		out.Reasons = append(out.Reasons, fmt.Sprintf("%d component version(s) in NicClusterPolicy/NicNodePolicy diverge from the selectedRelease catalog", mismatches))
+	}
+	if matrix != nil {
+		if matrix.Summary.Failed > 0 {
+			out.Pass = false
+			out.Reasons = append(out.Reasons, fmt.Sprintf("%d RDMA test(s) failed in the connectivity matrix", matrix.Summary.Failed))
+		}
+		if matrix.Skipped != nil {
+			out.Notes = append(out.Notes, "Connectivity matrix skipped: "+matrix.Skipped.Reason)
+		}
+	}
+	if verdict.HasInProgress {
+		out.Notes = append(out.Notes, fmt.Sprintf("%d manifest(s) still reconciling — re-run later or use --wait to block (does not gate the verdict)", verdict.InProgressCount))
+	}
+	// Platform topology mismatches fail the verdict but do NOT block
+	// the other stages (same pattern as version mismatch): when the
+	// discovered hardware drifts from the certified topology for the
+	// server type, it's a real validation failure, but the data
+	// plane is still testable. The HTML report's Node groups section
+	// shows the in-place diff per PF row — the banner just names the
+	// affected server type so operators know where to look.
+	for _, r := range presetResults {
+		if r.Status != presetmatch.StatusDeviation {
+			continue
+		}
+		out.Pass = false
+		out.Reasons = append(out.Reasons,
+			fmt.Sprintf("The detected platform topology does not match the certified topology for %s server type (see Node groups section for the per-device diff)",
+				platformLabel(r)))
+	}
+	return out
+}
+
+// platformLabel renders the server-type identifier used in user
+// messages as `<manufacturer>-<machineType>-<gpuType>`, dropping any
+// empty segments. The manufacturer is the leading segment when the
+// matched preset's topology.yaml declares it (e.g. "Dell-PowerEdge-
+// XE9680-H200"); on a not-found match it's omitted because we have
+// no manufacturer signal outside the catalog. Falls back to the
+// group identifier if nothing useful is set.
+func platformLabel(r presetmatch.Result) string {
+	parts := make([]string, 0, 3)
+	if r.Manufacturer != "" {
+		parts = append(parts, r.Manufacturer)
+	}
+	if r.MachineType != "" {
+		parts = append(parts, r.MachineType)
+	}
+	if r.GPUType != "" {
+		parts = append(parts, r.GPUType)
+	}
+	if len(parts) == 0 {
+		return r.Group
+	}
+	return strings.Join(parts, "-")
+}
+
+
+// emitVerdictBanner prints the PASS/FAIL banner at the top of text
+// output (above the per-check details that follow). JSON mode skips
+// this — the structured Verdict field travels with the report data.
+func emitVerdictBanner(v connectivity.OverallVerdict, format string) {
+	if format == "json" {
+		return
+	}
+	if v.Pass {
+		fmt.Println("\n══════════════════════════════════════════════════════════")
+		fmt.Println("                  ✓ VALIDATION PASSED")
+		fmt.Println("══════════════════════════════════════════════════════════")
+	} else {
+		fmt.Println("\n══════════════════════════════════════════════════════════")
+		fmt.Println("                  ✗ VALIDATION FAILED")
+		fmt.Println("══════════════════════════════════════════════════════════")
+		for _, r := range v.Reasons {
+			fmt.Printf("  • %s\n", r)
+		}
+	}
+	for _, n := range v.Notes {
+		fmt.Printf("  ⓘ %s\n", n)
+	}
+}
+
+// emitPresetMatchReport prints the per-group platform-topology check
+// in text mode. JSON mode skips this — the structured field rides
+// downstream on the HTML/JSON envelope.
+func emitPresetMatchReport(results []presetmatch.Result, format string) {
+	if len(results) == 0 || format == "json" {
+		return
+	}
+	fmt.Println()
+	fmt.Println("Platform topology validation")
+	for _, r := range results {
+		var status string
+		switch r.Status {
+		case presetmatch.StatusMatch:
+			status = "MATCH        "
+		case presetmatch.StatusDeviation:
+			status = "MISMATCH     "
+		case presetmatch.StatusNotFound:
+			status = "NOT CERTIFIED"
+		case presetmatch.StatusSkipped:
+			status = "SKIPPED      "
+		default:
+			status = "UNKNOWN      "
+		}
+		label := r.Group
+		if r.MachineType != "" || r.GPUType != "" {
+			label = fmt.Sprintf("%s (%s/%s)", r.Group, r.MachineType, r.GPUType)
+		}
+		detail := r.Reason
+		switch r.Status {
+		case presetmatch.StatusMatch:
+			detail = fmt.Sprintf("matches certified topology for %s server type", platformLabel(r))
+		case presetmatch.StatusDeviation:
+			detail = fmt.Sprintf("does not match certified topology for %s server type · %s",
+				platformLabel(r), r.Reason)
+		case presetmatch.StatusNotFound:
+			detail = fmt.Sprintf("no certified topology available for %s server type", platformLabel(r))
+		}
+		fmt.Printf("  [%s] %s — %s\n", status, label, detail)
+	}
+}
+
 // emitComponentVersionReport prints the component-version cross-check
 // in text mode (the JSON consumer reads the structured field added by
 // the HTML/JSON writers downstream). Skipped checks are surfaced as a
@@ -402,7 +599,7 @@ func resolveReportPath(flag, deploymentDir string) string {
 	case "-":
 		return ""
 	case "":
-		return filepath.Join(deploymentDir, "verify-report.html")
+		return filepath.Join(deploymentDir, "k8s-launch-kit-validation-report.html")
 	default:
 		return flag
 	}
@@ -424,6 +621,8 @@ func writeHTMLReportIfWanted(
 	results []networkoperatorplugin.ValidationResult,
 	matrix **connectivity.MatrixResult,
 	warnings *[]string,
+	presetResults []presetmatch.Result,
+	overall connectivity.OverallVerdict,
 	userCfgPath string,
 	outputFormat string,
 ) {
@@ -433,6 +632,7 @@ func writeHTMLReportIfWanted(
 	}
 
 	data := connectivity.ReportData{
+		Verdict: overall,
 		Cluster: connectivity.ClusterInfo{
 			L8kVersion:        Version,
 			GeneratedAt:       time.Now().UTC(),
@@ -441,10 +641,11 @@ func writeHTMLReportIfWanted(
 			OperatorNamespace: operatorNamespace,
 		},
 		Profile:         loadProfileInfo(userCfgPath, manifestDir),
-		NodeGroups:      loadNodeGroups(userCfgPath),
+		NodeGroups:      loadNodeGroups(userCfgPath, presetResults),
 		Nodes:           listNodesForReport(ctx, c),
 		Release:         versionCheck,
 		ComponentCheck:  componentCheck,
+		PresetMatches:   presetResults,
 		Manifests:       results,
 		Matrix:          *matrix,
 		Warnings:        *warnings,
@@ -672,7 +873,17 @@ func splitYAMLDocs(s string) []string {
 // loadNodeGroups projects every cluster-config.yaml `clusterConfig[]`
 // entry into a NodeGroupInfo for the report. Empty result when the
 // config wasn't found / parsed — the section just renders empty.
-func loadNodeGroups(userCfgPath string) []connectivity.NodeGroupInfo {
+//
+// presetResults carries the runtime-fresh per-group preset match
+// (computed by presetmatch.MatchAll at validate-time). Its
+// Deviations are preferred over the snapshot stored in
+// `cluster-config.yaml.clusterConfig[].presetDeviation`, which only
+// gets populated by `l8k discover` and goes stale as soon as the
+// catalog changes or the hardware is swapped. Falling back to the
+// stored snapshot when the runtime entry is absent keeps the older
+// flow working (e.g. validate run without l8k discover access to
+// the cluster).
+func loadNodeGroups(userCfgPath string, presetResults []presetmatch.Result) []connectivity.NodeGroupInfo {
 	if userCfgPath == "" {
 		return nil
 	}
@@ -680,6 +891,20 @@ func loadNodeGroups(userCfgPath string) []connectivity.NodeGroupInfo {
 	if err != nil || cfg == nil {
 		return nil
 	}
+	// Build a per-group index of fresh match results so the
+	// PF-table markers reflect what the banner is currently
+	// complaining about. The full Result is indexed (not just
+	// Deviations) because the marker pass also needs Preset.PFs
+	// to enrich "missing PCI" rows with the expected
+	// deviceID / rail / netdev that the certified topology
+	// declares for those addresses.
+	freshResultsByGroup := map[string]*presetmatch.Result{}
+	for i := range presetResults {
+		r := &presetResults[i]
+		if r.Status == presetmatch.StatusDeviation {
+			freshResultsByGroup[r.Group] = r
+		}
+	}
 	out := make([]connectivity.NodeGroupInfo, 0, len(cfg.ClusterConfig))
 	for _, g := range cfg.ClusterConfig {
 		ng := connectivity.NodeGroupInfo{
@@ -696,7 +921,19 @@ func loadNodeGroups(userCfgPath string) []connectivity.NodeGroupInfo {
 			ng.RdmaCapable = g.Capabilities.Nodes.Rdma
 			ng.IbCapable = g.Capabilities.Nodes.Ib
 		}
-		for _, d := range g.PresetDeviation {
+		// Pick the runtime-fresh result when available, falling back
+		// to whatever `l8k discover` stamped into cluster-config.
+		// The runtime path also carries the matched Preset (used to
+		// enrich missing-PCI rows below); the fallback only has
+		// stored deviations so missing rows there can't be enriched.
+		freshResult := freshResultsByGroup[g.Identifier]
+		var deviations []config.PresetDeviationEntry
+		if freshResult != nil {
+			deviations = freshResult.Deviations
+		} else {
+			deviations = g.PresetDeviation
+		}
+		for _, d := range deviations {
 			ng.PresetDeviations = append(ng.PresetDeviations, connectivity.PresetDeviation{
 				Field: d.Field, Expected: d.Expected, Got: d.Got, Detail: d.Detail,
 			})
@@ -731,11 +968,149 @@ func loadNodeGroups(userCfgPath string) []connectivity.NodeGroupInfo {
 				ng.EastWestPFs = append(ng.EastWestPFs, row)
 			}
 		}
+		applyPlatformTopologyMarkers(&ng, deviations, freshResult)
 		out = append(out, ng)
 	}
 	return out
 }
 
+// applyPlatformTopologyMarkers populates the report's two paired
+// PF views (Actual = discovered hardware, Expected = certified
+// topology) and flags mismatched rows in each.
+//
+//   - Actual row is Mismatched when its (PCI, deviceID) pair has no
+//     matching entry in the preset.
+//   - Expected row is Mismatched when its (PCI, deviceID) pair has
+//     no matching entry on the cluster.
+//   - PFCountMismatch is set from the pfCount deviation.
+//
+// matchResult may be nil when we're falling back to stored
+// cluster-config deviations (no preset object available); in that
+// case only Mismatched markers based on raw deviations are applied
+// and the Expected tables stay empty.
+func applyPlatformTopologyMarkers(ng *connectivity.NodeGroupInfo, deviations []config.PresetDeviationEntry, matchResult *presetmatch.Result) {
+	for _, d := range deviations {
+		if d.Field == "pfCount" {
+			var got, want int
+			_, _ = fmt.Sscanf(d.Got, "%d", &got)
+			_, _ = fmt.Sscanf(d.Expected, "%d", &want)
+			ng.PFCountMismatch = &connectivity.PFCountMismatch{Expected: want, Got: got}
+		}
+	}
+
+	if matchResult == nil || matchResult.Preset == nil {
+		// No live preset to compare against — fall back to flagging
+		// Actual rows whose PCI appears in any pciAddress/deviceID
+		// deviation from the stored snapshot. Expected tables stay
+		// empty.
+		flagged := map[string]bool{}
+		for _, d := range deviations {
+			switch d.Field {
+			case "deviceID":
+				if _, pci := splitDeviceIDAtPCI(d.Got); pci != "" {
+					flagged[pci] = true
+				}
+			case "pciAddress":
+				if d.Got != "" {
+					flagged[d.Got] = true
+				}
+			}
+		}
+		markActualMismatched(ng.EastWestPFs, flagged)
+		markActualMismatched(ng.NorthSouthPFs, flagged)
+		return
+	}
+
+	// Index actuals by PCI so the Expected pass can quickly check
+	// whether a preset PF has a matching cluster PF (same PCI +
+	// same deviceID).
+	type actualEntry struct{ deviceID string }
+	actualByPCI := map[string]actualEntry{}
+	for _, pf := range ng.EastWestPFs {
+		actualByPCI[pf.PciAddress] = actualEntry{deviceID: pf.DeviceID}
+	}
+	for _, pf := range ng.NorthSouthPFs {
+		actualByPCI[pf.PciAddress] = actualEntry{deviceID: pf.DeviceID}
+	}
+	// Index preset by PCI for the Actual pass.
+	presetByPCI := map[string]presets.PresetPF{}
+	for _, pf := range matchResult.Preset.PFs {
+		presetByPCI[pf.PciAddress] = pf
+	}
+
+	// Actual: flag rows whose PCI isn't in the preset, or whose
+	// deviceID at that PCI doesn't match the preset.
+	for i := range ng.EastWestPFs {
+		pf := &ng.EastWestPFs[i]
+		expected, ok := presetByPCI[pf.PciAddress]
+		if !ok || expected.DeviceID != pf.DeviceID {
+			pf.Mismatched = true
+		}
+	}
+	for i := range ng.NorthSouthPFs {
+		pf := &ng.NorthSouthPFs[i]
+		expected, ok := presetByPCI[pf.PciAddress]
+		if !ok || expected.DeviceID != pf.DeviceID {
+			pf.Mismatched = true
+		}
+	}
+
+	// Expected: project the preset's PFs into the same PFInfo
+	// shape, bucketed by traffic type. Flag rows whose (PCI,
+	// deviceID) pair isn't present on the cluster.
+	for _, pf := range matchResult.Preset.PFs {
+		row := connectivity.PFInfo{
+			PciAddress:       pf.PciAddress,
+			DeviceID:         pf.DeviceID,
+			Rail:             "—",
+			Traffic:          pf.Traffic,
+			NetworkInterface: pf.NetworkInterface,
+			RdmaDevice:       pf.RdmaDevice,
+			PSID:             pf.PSID,
+			PartNumber:       pf.PartNumber,
+			ConnectedGPU:     pf.ConnectedGPU,
+			GPUProximity:     pf.GPUProximity,
+		}
+		if pf.Rail != nil {
+			row.Rail = fmt.Sprintf("%d", *pf.Rail)
+		}
+		if pf.NumaNode != nil {
+			row.NumaNode = fmt.Sprintf("%d", *pf.NumaNode)
+		}
+		if actual, ok := actualByPCI[pf.PciAddress]; !ok || actual.deviceID != pf.DeviceID {
+			row.Mismatched = true
+		}
+		switch pf.Traffic {
+		case "north-south":
+			ng.ExpectedNorthSouthPFs = append(ng.ExpectedNorthSouthPFs, row)
+		default:
+			ng.ExpectedEastWestPFs = append(ng.ExpectedEastWestPFs, row)
+		}
+	}
+}
+
+// markActualMismatched flags any Actual PFInfo whose PCI is in the
+// flagged set. Used as the fallback path when we don't have a live
+// preset object to drive the full Actual/Expected pairing.
+func markActualMismatched(pfs []connectivity.PFInfo, flagged map[string]bool) {
+	for i := range pfs {
+		if flagged[pfs[i].PciAddress] {
+			pfs[i].Mismatched = true
+		}
+	}
+}
+
+// splitDeviceIDAtPCI splits a "<deviceID>@<pciAddr>" composite (the
+// format ValidatePreset emits for deviceID deviations) back into its
+// two parts. Returns ("", "") on malformed input.
+func splitDeviceIDAtPCI(s string) (deviceID, pci string) {
+	at := strings.IndexByte(s, '@')
+	if at < 0 {
+		return "", ""
+	}
+	return s[:at], s[at+1:]
+}
+
 // listNodesForReport pulls the cluster's node list and projects each
 // into a NodeInfo for the report. Reads l8k's machine/gpu labels
 // (config.MachineLabelKey / config.GPULabelKey) plus a best-effort
@@ -1007,13 +1382,13 @@ func emitValidationReport(vc *networkoperatorplugin.VersionCheck, results []netw
 
 	if len(presetDeviations) > 0 {
 		fmt.Println()
-		fmt.Println("Preset deviations (cluster differs from matched preset)")
+		fmt.Println("Platform topology mismatches (detected hardware differs from the certified topology)")
 		for _, gd := range presetDeviations {
 			label := gd.Group
 			if gd.MachineType != "" || gd.GPUType != "" {
 				label = fmt.Sprintf("%s (%s/%s)", gd.Group, gd.MachineType, gd.GPUType)
 			}
-			fmt.Printf("  %s — %d deviation(s):\n", label, len(gd.Deviations))
+			fmt.Printf("  %s — %d mismatch(es):\n", label, len(gd.Deviations))
 			for _, d := range gd.Deviations {
 				expected := d.Expected
 				if expected == "" {
@@ -1029,7 +1404,7 @@ func emitValidationReport(vc *networkoperatorplugin.VersionCheck, results []netw
 	}
 
 	fmt.Println()
-	fmt.Printf("Summary: %d/%d ready, %d in-progress, %d error, %d missing; version: %s; preset deviations: %d group(s)\n",
+	fmt.Printf("Summary: %d/%d ready, %d in-progress, %d error, %d missing; version: %s; topology mismatches: %d group(s)\n",
 		verdict.SuccessCount, verdict.Total,
 		verdict.InProgressCount, verdict.ErrorCount, verdict.MissingCount,
 		versionStatusText(vc), len(presetDeviations))
@@ -1081,12 +1456,11 @@ func init() {
 	// `l8k validate` exercises the data plane unless explicitly
 	// disabled. Pass `--connectivity=false` to limit validate to
 	// the static manifest-presence + Helm release-version checks.
-	validateCmd.Flags().BoolVar(&validateConnectivity, "connectivity", true, "Run a ping matrix between pods of the example DaemonSet to verify the data plane. Default true. Pass --connectivity=false to skip when only the static manifest checks are wanted.")
+	validateCmd.Flags().BoolVar(&validateConnectivity, "connectivity", true, "Run an RDMA matrix (rping + ib_write_bw) between pods of the example DaemonSet to verify the data plane. Default true. Pass --connectivity=false to skip when only the static manifest checks are wanted.")
 	validateCmd.Flags().BoolVar(&validateKeep, "keep", false, "Leave the example DaemonSet running after --connectivity completes (useful for debugging).")
-	validateCmd.Flags().DurationVar(&validateConnectivityTimeout, "connectivity-timeout", 5*time.Minute, "Wall-clock budget for the connectivity matrix (DaemonSet rollout + ping execs).")
-	validateCmd.Flags().IntVar(&validatePingCount, "ping-count", 3, "Number of ICMP echoes per src→dst pair when running --connectivity (ping -c N).")
+	validateCmd.Flags().DurationVar(&validateConnectivityTimeout, "connectivity-timeout", 5*time.Minute, "Wall-clock budget for the connectivity matrix (DaemonSet rollout + rping + ib_write_bw execs).")
 	validateCmd.Flags().DurationVar(&validateWait, "wait", 0, "Block validate up to this duration waiting for in-progress manifests to reach a terminal state. 0 (default) returns immediately on the first snapshot.")
-	validateCmd.Flags().StringVar(&validateReportPath, "report-path", "", "Write an HTML verify-report to this path. When empty (default), writes to <deployment-files>/verify-report.html. Pass '-' to skip the report file entirely.")
+	validateCmd.Flags().StringVar(&validateReportPath, "report-path", "", "Write the HTML validation report to this path. When empty (default), writes to <deployment-files>/k8s-launch-kit-validation-report.html. Pass '-' to skip the report file entirely.")
 
 	setFlagGroup(validateCmd, "kubeconfig", GroupCommon)
 	setFlagGroup(validateCmd, "user-config", GroupCommon)
diff --git a/pkg/networkoperatorplugin/connectivity/connectivity.go b/pkg/networkoperatorplugin/connectivity/connectivity.go
index 8a1339c..39931b8 100644
--- a/pkg/networkoperatorplugin/connectivity/connectivity.go
+++ b/pkg/networkoperatorplugin/connectivity/connectivity.go
@@ -21,7 +21,6 @@ import (
 	"fmt"
 	"sort"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/nvidia/k8s-launch-kit/pkg/ui"
@@ -39,18 +38,11 @@ type Options struct {
 	// validated).
 	ManifestDir string
 	// Timeout caps the whole connectivity phase: apply + DS rollout
-	// + ping execs + cleanup. 0 falls back to 5 minutes.
+	// + RDMA test execs + cleanup. 0 falls back to 5 minutes.
 	Timeout time.Duration
-	// PingCount is the number of ICMP echoes per src→dst pair
-	// (`ping -c N`). 0 falls back to 3.
-	PingCount int
 	// Keep leaves the test DaemonSets running after the matrix
 	// completes, for follow-up debugging. Default is to delete.
 	Keep bool
-	// MaxConcurrentPings caps the number of in-flight `ping` execs.
-	// 0 falls back to 16 — enough to keep a 4-rail × 16-pod matrix
-	// busy without overwhelming the apiserver.
-	MaxConcurrentPings int
 }
 
 // MatrixResult is the aggregate output of one connectivity run. It's
@@ -59,9 +51,8 @@ type MatrixResult struct {
 	// DaemonSets is one entry per applied example DS — typically
 	// one per merged group.
 	DaemonSets []DaemonSetReport
-	// PingResults is the flat list of every executed ping test.
-	// SameRail and CrossRail are derived views (filtered by Kind)
-	// for the report layer.
+	// PingResults is the flat list of every executed RDMA test
+	// (rping + ib_write_bw, same-rail + cross-rail).
 	PingResults []PingResult
 	// Skipped is non-nil when fewer than 2 schedulable test pods
 	// were available across all DaemonSets. The matrix is treated
@@ -72,11 +63,12 @@ type MatrixResult struct {
 }
 
 // MatrixSummary is the rolled-up counts validate prints under "summary".
+// Passed counts tests that exited cleanly; Failed counts tests that
+// ran but exited non-zero or produced unparseable output.
 type MatrixSummary struct {
 	TotalTests int
 	Passed     int
 	Failed     int
-	ExecErrors int
 }
 
 // DaemonSetReport captures one DS's rollout state and the test pods it
@@ -94,10 +86,14 @@ type DaemonSetReport struct {
 //  3. Waits for the DS rollout (desired > 0 AND ready == desired).
 //  4. Lists Running+Ready pods, parses each pod's multus annotation,
 //     filters to secondary networks, picks the first IPv4 per rail.
-//  5. Builds a test plan via Plan() — same-rail across pods + per-pod
-//     cross-rail canary; soft-skip if <2 schedulable pods.
-//  6. Runs every ping in parallel up to opts.MaxConcurrentPings.
-//  7. Deletes the DaemonSets unless opts.Keep.
+//  5. Discovers the RDMA device backing each multus interface per pod.
+//  6. Builds a test plan via Plan() — same-rail rping + ib_write_bw
+//     across pods plus per-pair cross-rail canaries; soft-skip if <2
+//     schedulable pods.
+//  7. Runs rping then ib_write_bw stages sequentially per the
+//     "spawn fresh server per test" lifecycle (concurrent runs would
+//     fight over the listener port).
+//  8. Deletes the DaemonSets unless opts.Keep.
 //
 // All UI output flows through `uiOutput` (caller passes
 // ui.FromContext(ctx)). Logs go to controller-runtime's logr.
@@ -105,12 +101,6 @@ func RunMatrix(ctx context.Context, c client.Client, restConfig *rest.Config, ui
 	if opts.Timeout <= 0 {
 		opts.Timeout = 5 * time.Minute
 	}
-	if opts.PingCount <= 0 {
-		opts.PingCount = 3
-	}
-	if opts.MaxConcurrentPings <= 0 {
-		opts.MaxConcurrentPings = 16
-	}
 
 	ctx, cancel := context.WithTimeout(ctx, opts.Timeout)
 	defer cancel()
@@ -187,10 +177,14 @@ func RunMatrix(ctx context.Context, c client.Client, restConfig *rest.Config, ui
 		}
 	}
 
-	// Parse multus annotations and build the TestPod list.
+	// Parse multus annotations and build the TestPod list. Each
+	// pod's RDMA-device-by-rail map is filled in from a single
+	// in-pod shell exec that reads
+	// /sys/class/net/<iface>/device/infiniband/ per multus iface,
+	// so the rping + ib_write_bw stages can pass `-d <dev>`.
 	testPods := make([]TestPod, 0, len(allPods))
 	for _, p := range allPods {
-		tp, err := buildTestPod(p.pod)
+		tp, ifaceByRail, err := buildTestPod(p.pod)
 		if err != nil {
 			uiOutput.Warning("Pod %s/%s: %v — excluded from matrix", p.pod.Namespace, p.pod.Name, err)
 			log.Log.V(1).Info("buildTestPod failed", "pod", p.pod.Name, "error", err.Error())
@@ -200,6 +194,10 @@ func RunMatrix(ctx context.Context, c client.Client, restConfig *rest.Config, ui
 			uiOutput.Warning("Pod %s/%s has no secondary network IPs — excluded from matrix", p.pod.Namespace, p.pod.Name)
 			continue
 		}
+		tp.RDMADevsByRail = DiscoverRDMADevices(ctx, restConfig, p.pod.Namespace, p.pod.Name, p.ref.Container, ifaceByRail)
+		log.Log.V(1).Info("RDMA device discovery",
+			"pod", p.pod.Name, "rails", tp.RailOrder,
+			"rdmaDevsByRail", tp.RDMADevsByRail)
 		testPods = append(testPods, tp)
 	}
 
@@ -210,56 +208,74 @@ func RunMatrix(ctx context.Context, c client.Client, restConfig *rest.Config, ui
 		return result, nil
 	}
 
-	uiOutput.Info("Plan: %d same-rail tests, %d cross-rail canary tests", len(plan.SameRail), len(plan.CrossRail))
+	totalSameRail := len(plan.RDMASameRail)
+	totalCrossRail := len(plan.RDMACrossRail)
+	uiOutput.Info("Plan: %d same-rail rping + %d cross-rail rping; %d same-rail ib_write_bw + %d cross-rail ib_write_bw",
+		totalSameRail, totalCrossRail, len(plan.RDMABwSameRail), len(plan.RDMABwCrossRail))
 
-	// Build a pod-name → container name map so RunPing knows which
-	// container to exec in (test DSes have a single container today,
-	// but the orchestrator stays general).
+	// Build a pod-name → container name map so the test runners
+	// know which container to exec in (test DSes have a single
+	// container today, but the orchestrator stays general).
 	containerByPod := map[string]string{}
 	namespaceByPod := map[string]string{}
 	for _, p := range allPods {
-		// We use the DS's resolved container name for every pod it
-		// owns — pods of the same DS share the template.
 		containerByPod[p.pod.Name] = p.ref.Container
 		namespaceByPod[p.pod.Name] = p.pod.Namespace
 	}
 
-	tests := append([]PingTest{}, plan.SameRail...)
-	tests = append(tests, plan.CrossRail...)
-	result.PingResults = make([]PingResult, len(tests))
-
-	// Bounded concurrency: a semaphore of size MaxConcurrentPings.
-	sem := make(chan struct{}, opts.MaxConcurrentPings)
-	var wg sync.WaitGroup
-	for i := range tests {
-		wg.Add(1)
-		sem <- struct{}{}
-		go func(idx int, t PingTest) {
-			defer wg.Done()
-			defer func() { <-sem }()
-			ns := namespaceByPod[t.SrcPod]
-			cn := containerByPod[t.SrcPod]
-			if ns == "" || cn == "" {
-				result.PingResults[idx] = PingResult{
+	// Two stages, each running sequentially per the "spawn fresh
+	// server per test" decision: every test starts its own server,
+	// runs the client, kills the server. Concurrent runs would
+	// fight over the listener port.
+	stages := []struct {
+		label string
+		tests []PingTest
+		run   func(t PingTest) PingResult
+	}{
+		{
+			label: "RDMA ping (rping)",
+			tests: append(append([]PingTest{}, plan.RDMASameRail...), plan.RDMACrossRail...),
+			run: func(t PingTest) PingResult {
+				return RunRPing(ctx, restConfig, namespaceByPod[t.DstPod],
+					t.DstPod, containerByPod[t.DstPod], // server side
+					t.SrcPod, containerByPod[t.SrcPod], // client side
+					t, 5)
+			},
+		},
+		{
+			label: "RDMA bandwidth (ib_write_bw)",
+			tests: append(append([]PingTest{}, plan.RDMABwSameRail...), plan.RDMABwCrossRail...),
+			run: func(t PingTest) PingResult {
+				return RunIbWriteBw(ctx, restConfig, namespaceByPod[t.DstPod],
+					t.DstPod, containerByPod[t.DstPod],
+					t.SrcPod, containerByPod[t.SrcPod],
+					t, 0)
+			},
+		},
+	}
+	for _, stage := range stages {
+		if len(stage.tests) == 0 {
+			continue
+		}
+		uiOutput.Info("Stage: %s — %d test(s)", stage.label, len(stage.tests))
+		for _, t := range stage.tests {
+			if namespaceByPod[t.SrcPod] == "" || namespaceByPod[t.DstPod] == "" {
+				result.PingResults = append(result.PingResults, PingResult{
 					Test: t,
-					Err:  fmt.Errorf("no namespace/container lookup for src pod %q", t.SrcPod),
-				}
-				return
+					Err:  fmt.Errorf("no namespace lookup for pod pair (%s, %s)", t.SrcPod, t.DstPod),
+				})
+				continue
 			}
-			result.PingResults[idx] = RunPing(ctx, restConfig, ns, cn, t, opts.PingCount)
-		}(i, tests[i])
+			result.PingResults = append(result.PingResults, stage.run(t))
+		}
 	}
-	wg.Wait()
 
 	// Summary.
 	for _, r := range result.PingResults {
 		result.Summary.TotalTests++
-		switch {
-		case r.OK:
+		if r.OK {
 			result.Summary.Passed++
-		case r.Err != nil && r.PacketLoss < 0:
-			result.Summary.ExecErrors++
-		default:
+		} else {
 			result.Summary.Failed++
 		}
 	}
@@ -273,7 +289,7 @@ func RunMatrix(ctx context.Context, c client.Client, restConfig *rest.Config, ui
 
 // testPodWithDS pairs a Running+Ready pod with the DaemonSetRef that
 // owns it so the orchestrator can later find the right container to
-// `ping` from.
+// exec into.
 type testPodWithDS struct {
 	pod *corev1.Pod
 	ref DaemonSetRef
@@ -281,12 +297,14 @@ type testPodWithDS struct {
 
 // buildTestPod converts a corev1.Pod into a TestPod by parsing the
 // multus `network-status` annotation. The first IPv4 per rail is used;
-// IPv6 is currently ignored.
-func buildTestPod(p *corev1.Pod) (TestPod, error) {
+// IPv6 is currently ignored. Also returns the rail→multus interface
+// name mapping (e.g. "rail-0" → "net1") so the caller can hand it to
+// DiscoverRDMADevices to populate RDMADevsByRail for the RDMA stages.
+func buildTestPod(p *corev1.Pod) (TestPod, map[string]string, error) {
 	ann := p.Annotations[MultusAnnotation]
 	nets, err := ParseNetworkStatus(ann)
 	if err != nil {
-		return TestPod{}, err
+		return TestPod{}, nil, err
 	}
 	tp := TestPod{
 		Name:      p.Name,
@@ -294,6 +312,7 @@ func buildTestPod(p *corev1.Pod) (TestPod, error) {
 		Node:      p.Spec.NodeName,
 		IPsByRail: map[string]string{},
 	}
+	ifaceByRail := map[string]string{}
 	for _, n := range SecondaryNetworks(nets) {
 		ip := pickIPv4(n.IPs)
 		if ip == "" {
@@ -311,13 +330,16 @@ func buildTestPod(p *corev1.Pod) (TestPod, error) {
 		}
 		tp.IPsByRail[rail] = ip
 		tp.RailOrder = append(tp.RailOrder, rail)
+		if n.Interface != "" {
+			ifaceByRail[rail] = n.Interface
+		}
 	}
 	sort.Strings(tp.RailOrder)
-	return tp, nil
+	return tp, ifaceByRail, nil
 }
 
 // pickIPv4 returns the first IPv4 address from the multus IP list, or
-// "" if none. IPv6 entries are ignored (matrix uses ping, not ping6).
+// "" if none. IPv6 entries are ignored.
 func pickIPv4(ips []string) string {
 	for _, ip := range ips {
 		if strings.Count(ip, ".") == 3 && !strings.Contains(ip, ":") {
@@ -339,10 +361,10 @@ func railKey(name string) string {
 
 func emitSummary(uiOutput ui.Output, result *MatrixResult) {
 	s := result.Summary
-	uiOutput.Info("Matrix complete: %d/%d passed, %d failed, %d exec error(s)",
-		s.Passed, s.TotalTests, s.Failed, s.ExecErrors)
-	if s.Failed == 0 && s.ExecErrors == 0 && s.TotalTests > 0 {
-		uiOutput.Success("All %d ping test(s) passed", s.TotalTests)
+	uiOutput.Info("Matrix complete: %d/%d passed, %d failed",
+		s.Passed, s.TotalTests, s.Failed)
+	if s.Failed == 0 && s.TotalTests > 0 {
+		uiOutput.Success("All %d RDMA test(s) passed", s.TotalTests)
 		return
 	}
 	// Render up to 5 failures so operators see what broke without
@@ -362,15 +384,25 @@ func emitSummary(uiOutput ui.Output, result *MatrixResult) {
 		}
 		src := failureLabel(f.Test.SrcNode, f.Test.SrcPod)
 		dst := failureLabel(f.Test.DstNode, f.Test.DstPod)
-		switch {
-		case f.Err != nil:
-			uiOutput.Error("ping %s→%s (%s): %v", src, dst, f.Test.Rail, f.Err)
-		default:
-			uiOutput.Error("ping %s→%s (%s): %d%% packet loss", src, dst, f.Test.Rail, f.PacketLoss)
+		label := stageLabelOf(f.Test.Kind)
+		if f.Err != nil {
+			uiOutput.Error("%s %s→%s (%s): %v", label, src, dst, f.Test.Rail, f.Err)
+		} else {
+			uiOutput.Error("%s %s→%s (%s): test failed (no result row parsed or non-zero exit)",
+				label, src, dst, f.Test.Rail)
 		}
 	}
 }
 
+// stageLabelOf returns the human-friendly label for a test kind used
+// in failure-line prefixes ("rping", "ib_write_bw").
+func stageLabelOf(k PingTestKind) string {
+	if k.IsRDMABw() {
+		return "ib_write_bw"
+	}
+	return "rping"
+}
+
 // failureLabel mirrors axisLabel from text_report.go — duplicated here
 // so emitSummary doesn't have to expose the renderer's helper.
 func failureLabel(node, pod string) string {
diff --git a/pkg/networkoperatorplugin/connectivity/matrix.go b/pkg/networkoperatorplugin/connectivity/matrix.go
index 3c7840c..d522d47 100644
--- a/pkg/networkoperatorplugin/connectivity/matrix.go
+++ b/pkg/networkoperatorplugin/connectivity/matrix.go
@@ -34,6 +34,12 @@ type TestPod struct {
 	// typically have one v4 per attachment). Rails that didn't yield
 	// an IP are absent from the map.
 	IPsByRail map[string]string
+	// RDMADevsByRail is "<rail>" → RDMA device name (e.g.
+	// "mlx5_0") for the VF attached to that secondary network.
+	// Populated by DiscoverRDMADevices(); rails without an
+	// RDMA-capable device or where the lookup failed are absent.
+	// Used by RunIbWriteBw / RunRPing to pass `-d <dev>`.
+	RDMADevsByRail map[string]string
 	// RailOrder is the deterministic iteration order over IPsByRail
 	// — needed because Go map iteration is randomized and the cross
 	// -rail canary picks "rail 0" vs "rail 1" by position.
@@ -41,37 +47,70 @@ type TestPod struct {
 }
 
 // PingTest is one src→dst test the matrix will execute. Kind tags the
-// test bucket: same-rail tests verify the rail is reachable end-to-end;
-// cross-rail tests are a single canary per pod pair to spot
-// misconfigured routes that would otherwise be invisible if every rail
-// passed independently.
+// test bucket. Two RDMA test families are scheduled in stages by the
+// orchestrator:
+//
+//   - Kind = RDMAPingSameRail / RDMAPingCrossRail — `rping -c -a
+//     <ip>` against an `rping -s -a <ip>` server. Verifies the QP
+//     pair establishes end-to-end (RDMA-CM TCP handshake + RDMA
+//     write).
+//   - Kind = RDMABwSameRail / RDMABwCrossRail — `ib_write_bw -d
+//     <dev> -R --report_gbits [<server-ip>]` against an
+//     `ib_write_bw -d <dev> -R --report_gbits` server. Measures
+//     bandwidth in Gbps.
 //
 // SrcPod/DstPod carry the actual k8s pod names — needed by the
 // orchestrator to issue the SPDY exec against the right pod. SrcNode/
 // DstNode are the kubelet's `Spec.NodeName` for each endpoint and are
 // used everywhere the matrix is rendered for humans (operators
 // recognize hostnames; DaemonSet-generated pod suffixes are noise).
+// SrcRDMADev / DstRDMADev carry the per-pod RDMA device names so the
+// test runner can pass `-d <dev>` for ib_write_bw.
 type PingTest struct {
-	Kind    PingTestKind
-	SrcPod  string
-	DstPod  string
-	SrcNode string
-	DstNode string
-	Rail    string // for same-rail tests; "<srcRail>→<dstRail>" for cross
-	SrcIP   string
-	DstIP   string
-	SrcRail string
-	DstRail string
+	Kind       PingTestKind
+	SrcPod     string
+	DstPod     string
+	SrcNode    string
+	DstNode    string
+	Rail       string // for same-rail tests; "<srcRail>→<dstRail>" for cross
+	SrcIP      string
+	DstIP      string
+	SrcRail    string
+	DstRail    string
+	SrcRDMADev string
+	DstRDMADev string
 }
 
-// PingTestKind enumerates the buckets the matrix renders into.
+// PingTestKind enumerates the buckets the matrix renders into. Order
+// follows the orchestrator's staged-execution flow: rping first
+// (QP-establishment canary), ib_write_bw second (bandwidth).
 type PingTestKind int
 
 const (
-	PingSameRail PingTestKind = iota
-	PingCrossRail
+	RDMAPingSameRail PingTestKind = iota
+	RDMAPingCrossRail
+	RDMABwSameRail
+	RDMABwCrossRail
 )
 
+// IsRDMAPing reports whether the test kind is an rping (RDMA-CM QP)
+// test.
+func (k PingTestKind) IsRDMAPing() bool {
+	return k == RDMAPingSameRail || k == RDMAPingCrossRail
+}
+
+// IsRDMABw reports whether the test kind is an ib_write_bw bandwidth
+// test.
+func (k PingTestKind) IsRDMABw() bool {
+	return k == RDMABwSameRail || k == RDMABwCrossRail
+}
+
+// IsCrossRail reports whether the test kind is one of the cross-rail
+// canary variants (regardless of family).
+func (k PingTestKind) IsCrossRail() bool {
+	return k == RDMAPingCrossRail || k == RDMABwCrossRail
+}
+
 // MatrixSkip captures the soft-skip case where fewer than 2 schedulable
 // test pods were found — the matrix returns an empty TestSet plus this
 // reason for the operator to see.
@@ -82,10 +121,17 @@ type MatrixSkip struct {
 // MatrixPlan is the full set of tests the orchestrator will execute.
 // Generated by Plan() from a sorted list of TestPods so the order is
 // deterministic regardless of the cluster's pod-listing order.
+//
+// Pairs without RDMA device names on both ends (the discovery step
+// couldn't resolve the secondary network → mlx5_X mapping) are
+// silently dropped: there's no `-d <dev>` to pass and the test
+// would fail with a confusing error.
 type MatrixPlan struct {
-	SameRail   []PingTest
-	CrossRail  []PingTest
-	Skip       *MatrixSkip
+	RDMASameRail    []PingTest
+	RDMACrossRail   []PingTest
+	RDMABwSameRail  []PingTest
+	RDMABwCrossRail []PingTest
+	Skip            *MatrixSkip
 }
 
 // Plan builds the test plan from the given pods. The matrix generation
@@ -93,13 +139,13 @@ type MatrixPlan struct {
 // fixture pod lists.
 //
 //   - Same-rail: for every ordered pod pair (A, B) with A≠B, emit one
-//     test per rail both endpoints share. This is the data-plane
-//     verification proper.
-//   - Cross-rail canary: for every ordered pod pair (A, B), emit a
-//     single test pinging A.rail[0] → B.rail[1] when both pods have
-//     ≥2 rails. Catches routing misconfigurations that allow
-//     accidental rail-to-rail leakage (or, when blocked by design,
-//     confirms the isolation).
+//     rping + one ib_write_bw test per rail both endpoints share. This
+//     is the data-plane verification proper.
+//   - Cross-rail canary: for every ordered pod pair (A, B), emit one
+//     rping + one ib_write_bw test pinging A.rail[0] → B.rail[1] when
+//     both pods have ≥2 rails. Catches routing misconfigurations that
+//     allow accidental rail-to-rail leakage (or, when blocked by
+//     design, confirms the isolation).
 //   - Soft-skip on <2 pods: no tests are emitted and Skip is set so
 //     the caller can render "matrix skipped: only N schedulable test
 //     pod(s)".
@@ -122,25 +168,32 @@ func Plan(pods []TestPod) MatrixPlan {
 				continue
 			}
 			// Same-rail tests: rails the two endpoints both
-			// attach to.
+			// attach to, where RDMA devices are known on both
+			// ends.
 			for _, rail := range src.RailOrder {
 				srcIP, ok1 := src.IPsByRail[rail]
 				dstIP, ok2 := dst.IPsByRail[rail]
 				if !ok1 || !ok2 || srcIP == "" || dstIP == "" {
 					continue
 				}
-				plan.SameRail = append(plan.SameRail, PingTest{
-					Kind:    PingSameRail,
-					SrcPod:  src.Name,
-					DstPod:  dst.Name,
-					SrcNode: src.Node,
-					DstNode: dst.Node,
-					Rail:    rail,
-					SrcIP:   srcIP,
-					DstIP:   dstIP,
-					SrcRail: rail,
-					DstRail: rail,
-				})
+				srcDev, hasSrcDev := src.RDMADevsByRail[rail]
+				dstDev, hasDstDev := dst.RDMADevsByRail[rail]
+				if !hasSrcDev || !hasDstDev || srcDev == "" || dstDev == "" {
+					continue
+				}
+				base := PingTest{
+					SrcPod: src.Name, DstPod: dst.Name,
+					SrcNode: src.Node, DstNode: dst.Node,
+					Rail: rail, SrcIP: srcIP, DstIP: dstIP,
+					SrcRail: rail, DstRail: rail,
+					SrcRDMADev: srcDev, DstRDMADev: dstDev,
+				}
+				rpingT := base
+				rpingT.Kind = RDMAPingSameRail
+				plan.RDMASameRail = append(plan.RDMASameRail, rpingT)
+				bwT := base
+				bwT.Kind = RDMABwSameRail
+				plan.RDMABwSameRail = append(plan.RDMABwSameRail, bwT)
 			}
 			// Cross-rail canary: src.rail[0] → dst.rail[1] when
 			// both pods have ≥2 rails. Skipped otherwise.
@@ -148,20 +201,25 @@ func Plan(pods []TestPod) MatrixPlan {
 				srcRail, dstRail := src.RailOrder[0], dst.RailOrder[1]
 				srcIP, ok1 := src.IPsByRail[srcRail]
 				dstIP, ok2 := dst.IPsByRail[dstRail]
-				if ok1 && ok2 && srcIP != "" && dstIP != "" {
-					plan.CrossRail = append(plan.CrossRail, PingTest{
-						Kind:    PingCrossRail,
-						SrcPod:  src.Name,
-						DstPod:  dst.Name,
-						SrcNode: src.Node,
-						DstNode: dst.Node,
-						Rail:    fmt.Sprintf("%s→%s", srcRail, dstRail),
-						SrcIP:   srcIP,
-						DstIP:   dstIP,
-						SrcRail: srcRail,
-						DstRail: dstRail,
-					})
+				srcDev, hasSrcDev := src.RDMADevsByRail[srcRail]
+				dstDev, hasDstDev := dst.RDMADevsByRail[dstRail]
+				if !ok1 || !ok2 || srcIP == "" || dstIP == "" || !hasSrcDev || !hasDstDev {
+					continue
+				}
+				base := PingTest{
+					SrcPod: src.Name, DstPod: dst.Name,
+					SrcNode: src.Node, DstNode: dst.Node,
+					Rail:    fmt.Sprintf("%s→%s", srcRail, dstRail),
+					SrcIP:   srcIP, DstIP: dstIP,
+					SrcRail: srcRail, DstRail: dstRail,
+					SrcRDMADev: srcDev, DstRDMADev: dstDev,
 				}
+				rpingC := base
+				rpingC.Kind = RDMAPingCrossRail
+				plan.RDMACrossRail = append(plan.RDMACrossRail, rpingC)
+				bwC := base
+				bwC.Kind = RDMABwCrossRail
+				plan.RDMABwCrossRail = append(plan.RDMABwCrossRail, bwC)
 			}
 		}
 	}
diff --git a/pkg/networkoperatorplugin/connectivity/matrix_test.go b/pkg/networkoperatorplugin/connectivity/matrix_test.go
index af5366a..b4e1cc3 100644
--- a/pkg/networkoperatorplugin/connectivity/matrix_test.go
+++ b/pkg/networkoperatorplugin/connectivity/matrix_test.go
@@ -23,16 +23,20 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
+// mkPod builds a TestPod fixture with synthetic IPs *and* an RDMA
+// device per rail. Plan() now requires both sides of a pair to have an
+// RDMA device for the rail in order to emit tests (since the matrix
+// is RDMA-only), so the test fixture must populate RDMADevsByRail to
+// produce non-empty plans.
 func mkPod(name string, rails ...string) TestPod {
 	tp := TestPod{
-		Name:      name,
-		IPsByRail: map[string]string{},
+		Name:           name,
+		IPsByRail:      map[string]string{},
+		RDMADevsByRail: map[string]string{},
 	}
-	for i, rail := range rails {
-		// Synthesize an IP deterministic per (pod, rail) so failed
-		// assertions are readable: pod-A rail-0 → "10.0.<podIdx>.<railIdx>"
-		_ = i
+	for _, rail := range rails {
 		tp.IPsByRail[rail] = fakeIP(name, rail)
+		tp.RDMADevsByRail[rail] = fakeRDMADev(name, rail)
 		tp.RailOrder = append(tp.RailOrder, rail)
 	}
 	return tp
@@ -70,12 +74,19 @@ func fakeIP(pod, rail string) string {
 	return "0.0.0.0"
 }
 
+// fakeRDMADev returns a stable mlx5-style device name per (pod, rail).
+// The test only needs the strings to be non-empty for Plan() to emit
+// tests; the actual device names are never opened.
+func fakeRDMADev(pod, rail string) string {
+	return "mlx5_" + pod + "_" + rail
+}
+
 func TestPlan_SoftSkipOnFewerThan2Pods(t *testing.T) {
 	t.Run("zero pods", func(t *testing.T) {
 		plan := Plan(nil)
 		require.NotNil(t, plan.Skip)
 		assert.Contains(t, plan.Skip.Reason, "0 schedulable")
-		assert.Empty(t, plan.SameRail)
+		assert.Empty(t, plan.RDMASameRail)
 	})
 	t.Run("one pod", func(t *testing.T) {
 		plan := Plan([]TestPod{mkPod("pod-a", "rail-0", "rail-1")})
@@ -91,18 +102,28 @@ func TestPlan_TwoPodsOneRail(t *testing.T) {
 	})
 	require.Nil(t, plan.Skip)
 
-	// Same-rail: 2 pods × 1 ordered pair each direction × 1 rail = 2 tests.
-	assert.Len(t, plan.SameRail, 2)
+	// Same-rail: 2 pods × 1 ordered pair each direction × 1 rail = 2
+	// rping tests and 2 ib_write_bw tests.
+	assert.Len(t, plan.RDMASameRail, 2)
+	assert.Len(t, plan.RDMABwSameRail, 2)
 	// Cross-rail canary requires ≥2 rails per pod — skipped here.
-	assert.Empty(t, plan.CrossRail)
+	assert.Empty(t, plan.RDMACrossRail)
+	assert.Empty(t, plan.RDMABwCrossRail)
 
-	// Check direction coverage: both A→B and B→A appear.
+	// Check direction coverage: both A→B and B→A appear in the rping
+	// slice.
 	dirs := map[string]bool{}
-	for _, t := range plan.SameRail {
+	for _, t := range plan.RDMASameRail {
 		dirs[t.SrcPod+"→"+t.DstPod] = true
 	}
 	assert.True(t, dirs["pod-a→pod-b"])
 	assert.True(t, dirs["pod-b→pod-a"])
+	// Every emitted test carries the per-side RDMA device name so
+	// ib_write_bw can pass `-d <dev>`.
+	for _, tt := range plan.RDMABwSameRail {
+		assert.NotEmpty(t, tt.SrcRDMADev, "%+v", tt)
+		assert.NotEmpty(t, tt.DstRDMADev, "%+v", tt)
+	}
 }
 
 func TestPlan_TwoPodsTwoRails_IncludesCrossRailCanary(t *testing.T) {
@@ -112,15 +133,18 @@ func TestPlan_TwoPodsTwoRails_IncludesCrossRailCanary(t *testing.T) {
 	})
 	require.Nil(t, plan.Skip)
 
-	// Same-rail: 2 pods × 1 ordered pair each direction × 2 rails = 4.
-	assert.Len(t, plan.SameRail, 4)
-	// Cross-rail canary: one per ordered pod pair = 2.
-	assert.Len(t, plan.CrossRail, 2)
+	// Same-rail: 2 pods × 1 ordered pair each direction × 2 rails = 4
+	// per family.
+	assert.Len(t, plan.RDMASameRail, 4)
+	assert.Len(t, plan.RDMABwSameRail, 4)
+	// Cross-rail canary: one per ordered pod pair = 2 per family.
+	assert.Len(t, plan.RDMACrossRail, 2)
+	assert.Len(t, plan.RDMABwCrossRail, 2)
 
 	// Each cross-rail test should ping rail-0 → rail-1 (the first two
 	// rails by sorted order). Concrete IP assertions catch
 	// off-by-one in railOrder indexing.
-	for _, c := range plan.CrossRail {
+	for _, c := range plan.RDMACrossRail {
 		assert.Equal(t, "rail-0", c.SrcRail)
 		assert.Equal(t, "rail-1", c.DstRail)
 		assert.Equal(t, "rail-0→rail-1", c.Rail)
@@ -141,13 +165,15 @@ func TestPlan_ThreePodsThreeRails_FullMatrix(t *testing.T) {
 	//   - pod-a ↔ pod-b: 3 rails × 2 dirs = 6
 	//   - pod-a ↔ pod-c: 2 rails × 2 dirs = 4
 	//   - pod-b ↔ pod-c: 2 rails × 2 dirs = 4
-	// Total = 14.
-	assert.Len(t, plan.SameRail, 14)
+	// Total = 14 per family (rping + ib_write_bw each).
+	assert.Len(t, plan.RDMASameRail, 14)
+	assert.Len(t, plan.RDMABwSameRail, 14)
 
 	// Cross-rail canary requires ≥2 rails on both endpoints. All
 	// three pods qualify (pod-c has 2), so every ordered pair gets
-	// one canary: 3 × 2 = 6.
-	assert.Len(t, plan.CrossRail, 6)
+	// one canary: 3 × 2 = 6 per family.
+	assert.Len(t, plan.RDMACrossRail, 6)
+	assert.Len(t, plan.RDMABwCrossRail, 6)
 }
 
 func TestPlan_StableOrderingAcrossRuns(t *testing.T) {
@@ -158,10 +184,12 @@ func TestPlan_StableOrderingAcrossRuns(t *testing.T) {
 	}
 	plan1 := Plan(pods)
 	plan2 := Plan(pods)
-	require.Equal(t, plan1.SameRail, plan2.SameRail)
-	require.Equal(t, plan1.CrossRail, plan2.CrossRail)
+	require.Equal(t, plan1.RDMASameRail, plan2.RDMASameRail)
+	require.Equal(t, plan1.RDMACrossRail, plan2.RDMACrossRail)
+	require.Equal(t, plan1.RDMABwSameRail, plan2.RDMABwSameRail)
+	require.Equal(t, plan1.RDMABwCrossRail, plan2.RDMABwCrossRail)
 	// First test should always be from pod-a (sorted-by-name).
-	assert.Equal(t, "pod-a", plan1.SameRail[0].SrcPod)
+	assert.Equal(t, "pod-a", plan1.RDMASameRail[0].SrcPod)
 }
 
 func TestPlan_SkipsPairsWithoutSharedRail(t *testing.T) {
@@ -171,7 +199,21 @@ func TestPlan_SkipsPairsWithoutSharedRail(t *testing.T) {
 	})
 	require.Nil(t, plan.Skip)
 	// No shared rail → zero same-rail tests.
-	assert.Empty(t, plan.SameRail)
+	assert.Empty(t, plan.RDMASameRail)
+	assert.Empty(t, plan.RDMABwSameRail)
 	// Each pod only has 1 rail → no cross-rail canary either.
-	assert.Empty(t, plan.CrossRail)
+	assert.Empty(t, plan.RDMACrossRail)
+	assert.Empty(t, plan.RDMABwCrossRail)
+}
+
+func TestPlan_SkipsPairsWithoutRDMADevices(t *testing.T) {
+	// Pod with an IP but no RDMA device for the shared rail —
+	// Plan() drops those silently since RunIbWriteBw needs -d <dev>.
+	a := mkPod("pod-a", "rail-0")
+	b := mkPod("pod-b", "rail-0")
+	delete(b.RDMADevsByRail, "rail-0")
+	plan := Plan([]TestPod{a, b})
+	require.Nil(t, plan.Skip)
+	assert.Empty(t, plan.RDMASameRail, "no test should be emitted for the missing-device pair")
+	assert.Empty(t, plan.RDMABwSameRail)
 }
diff --git a/pkg/networkoperatorplugin/connectivity/ping.go b/pkg/networkoperatorplugin/connectivity/ping.go
deleted file mode 100644
index b0baa51..0000000
--- a/pkg/networkoperatorplugin/connectivity/ping.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package connectivity
-
-import (
-	"context"
-	"fmt"
-	"regexp"
-	"strconv"
-
-	"github.com/nvidia/k8s-launch-kit/pkg/kubeclient"
-	"k8s.io/client-go/rest"
-)
-
-// PingResult carries the outcome of one src→dst ping test. PacketLoss
-// (0-100) is parsed from `ping`'s summary line; -1 means the line
-// couldn't be parsed (treated as fail). RTT fields are best-effort —
-// `ping` omits the rtt summary when every packet is lost.
-type PingResult struct {
-	Test       PingTest
-	OK         bool
-	PacketLoss int     // percent, 0-100; -1 when not parseable
-	RTTAvgMs   float64 // 0 when unknown
-	Stdout     string
-	Stderr     string
-	Err        error
-}
-
-// pingSummaryRe matches the trailing summary line of `ping -c N` output,
-// e.g.:
-//
-//	3 packets transmitted, 3 received, 0% packet loss, time 2003ms
-//	5 packets transmitted, 0 received, 100% packet loss, time 4099ms
-//
-// Both BusyBox ping and iputils ping share this format; the
-// `mellanox/rping-test` image ships iputils.
-var pingSummaryRe = regexp.MustCompile(`(\d+)\s+packets transmitted,\s*(\d+)\s+(?:packets\s+)?received,\s*(\d+)%\s+packet loss`)
-
-// pingRTTRe matches the optional rtt summary line, e.g.:
-//
-//	rtt min/avg/max/mdev = 0.123/0.456/0.789/0.100 ms
-var pingRTTRe = regexp.MustCompile(`rtt min/avg/max/mdev = [\d.]+/([\d.]+)/[\d.]+/[\d.]+ ms`)
-
-// RunPing execs `ping -c <count> -W 1 -I <srcIP> <dstIP>` inside the
-// source pod and parses the summary line. `-I` is critical for the
-// rail-based matrix — without it, ping would use the pod's default-CNI
-// interface and never traverse the secondary network we're testing.
-//
-// Returned PingResult is always populated; PingResult.Err carries any
-// exec-level failure (RBAC, pod gone, kubelet timeout). PingResult.OK
-// is true only when the exec succeeded *and* packet loss < 100%.
-func RunPing(ctx context.Context, restConfig *rest.Config, namespace, container string, test PingTest, count int) PingResult {
-	if count <= 0 {
-		count = 3
-	}
-	cmd := []string{
-		"ping",
-		"-c", strconv.Itoa(count),
-		"-W", "1",
-		"-I", test.SrcIP,
-		test.DstIP,
-	}
-	res, err := kubeclient.ExecInPod(ctx, restConfig, namespace, test.SrcPod, container, cmd)
-
-	r := PingResult{
-		Test:       test,
-		PacketLoss: -1,
-		Stdout:     res.Stdout,
-		Stderr:     res.Stderr,
-		Err:        err,
-	}
-
-	// Even on exec error we try to parse stdout — ping returns
-	// non-zero when packet loss is 100%, and the SPDY executor
-	// surfaces that as an error with stdout still populated.
-	if m := pingSummaryRe.FindStringSubmatch(res.Stdout); m != nil {
-		if loss, perr := strconv.Atoi(m[3]); perr == nil {
-			r.PacketLoss = loss
-		}
-	}
-	if m := pingRTTRe.FindStringSubmatch(res.Stdout); m != nil {
-		if avg, perr := strconv.ParseFloat(m[1], 64); perr == nil {
-			r.RTTAvgMs = avg
-		}
-	}
-
-	switch {
-	case err != nil && r.PacketLoss < 0:
-		// Couldn't even parse output — treat as fail and keep the
-		// exec error for the report.
-		r.OK = false
-	case r.PacketLoss < 0:
-		// Stdout present but no summary line — unusual; surface
-		// it.
-		r.OK = false
-		r.Err = fmt.Errorf("ping output missing summary line")
-	default:
-		r.OK = r.PacketLoss < 100
-	}
-	return r
-}
diff --git a/pkg/networkoperatorplugin/connectivity/rdma.go b/pkg/networkoperatorplugin/connectivity/rdma.go
new file mode 100644
index 0000000..2e511e9
--- /dev/null
+++ b/pkg/networkoperatorplugin/connectivity/rdma.go
@@ -0,0 +1,261 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package connectivity
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/nvidia/k8s-launch-kit/pkg/kubeclient"
+	"k8s.io/client-go/rest"
+)
+
+// rdmaServerSettleDelay is the pause between starting the server
+// process and connecting from the client. ib_write_bw and rping both
+// open a TCP listener on the server side before accepting QP connect
+// requests; without a small settle window the client races the
+// listener and reports "Couldn't connect to server".
+const rdmaServerSettleDelay = 2 * time.Second
+
+// rdmaTestTimeout caps any single RDMA test (server start + client
+// run + cleanup). Generous enough to survive a sluggish first
+// connection; the orchestrator caps the whole matrix separately via
+// Options.Timeout.
+const rdmaTestTimeout = 90 * time.Second
+
+// DiscoverRDMADevices runs once per test pod to map each multus
+// secondary-network interface to its RDMA device. Reads
+// /sys/class/net/<iface>/device/infiniband/ inside the pod — that
+// directory exists for any mlx5 VF and contains exactly one entry
+// (the device name, e.g. "mlx5_4").
+//
+// nets is the parsed multus annotation; ifaceByRail tells the
+// mapping which net interface (e.g. "net1") backs each rail.
+// Returns map[rail]→<rdmaDev>; rails whose interface couldn't be
+// resolved are absent from the map.
+func DiscoverRDMADevices(ctx context.Context, restConfig *rest.Config, namespace, pod, container string, ifaceByRail map[string]string) map[string]string {
+	out := map[string]string{}
+	if len(ifaceByRail) == 0 {
+		return out
+	}
+	// One shell exec per pod: build a tiny script that prints
+	// `<rail>=<dev>` per resolved interface. Cheaper than one
+	// exec per rail.
+	var b strings.Builder
+	for rail, iface := range ifaceByRail {
+		if iface == "" {
+			continue
+		}
+		// `ls -1` prints one filename per line. Capture the
+		// first line — the mlx5 VF directory always contains
+		// exactly one entry, but `head -n 1` is a cheap safety
+		// belt against future kernel changes.
+		fmt.Fprintf(&b, "dev=$(ls -1 /sys/class/net/%s/device/infiniband/ 2>/dev/null | head -n 1); ", iface)
+		fmt.Fprintf(&b, "if [ -n \"$dev\" ]; then echo %q=$dev; fi; ", rail)
+	}
+	if b.Len() == 0 {
+		return out
+	}
+	res, err := kubeclient.ExecInPod(ctx, restConfig, namespace, pod, container, []string{"/bin/sh", "-c", b.String()})
+	if err != nil {
+		return out
+	}
+	for _, line := range strings.Split(res.Stdout, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		eq := strings.IndexByte(line, '=')
+		if eq < 0 {
+			continue
+		}
+		rail := strings.Trim(strings.TrimSpace(line[:eq]), `"`)
+		dev := strings.TrimSpace(line[eq+1:])
+		if rail != "" && dev != "" {
+			out[rail] = dev
+		}
+	}
+	return out
+}
+
+// RunRPing executes the rping server/client dance for one test pair.
+// Spawn-fresh-server-per-test (the agreed lifecycle): start the
+// server as a background process via `nohup rping -s &`, sleep
+// rdmaServerSettleDelay, run the client, then `pkill rping` on the
+// server pod. Cleanup runs even if the client errored.
+//
+// rping returns 0 on success after C iterations. We don't read its
+// output for any structured value — the binary OK is enough for
+// matrix display. rping verifies the QP pair establishes
+// end-to-end; ib_write_bw additionally measures throughput.
+func RunRPing(ctx context.Context, restConfig *rest.Config, namespace string, serverPod, serverContainer string, clientPod, clientContainer string, test PingTest, iterations int) PingResult {
+	if iterations <= 0 {
+		iterations = 5
+	}
+	r := PingResult{Test: test}
+
+	tctx, cancel := context.WithTimeout(ctx, rdmaTestTimeout)
+	defer cancel()
+
+	serverCmd := fmt.Sprintf("nohup rping -s -a %s -p 9999 -v >/tmp/rping-server.log 2>&1 & echo $!", test.DstIP)
+	srvRes, err := kubeclient.ExecInPod(tctx, restConfig, namespace, serverPod, serverContainer, []string{"/bin/sh", "-c", serverCmd})
+	if err != nil {
+		r.Err = fmt.Errorf("rping server start: %w (stderr: %s)", err, srvRes.Stderr)
+		return r
+	}
+	serverPID := strings.TrimSpace(srvRes.Stdout)
+	defer killRDMAServer(restConfig, namespace, serverPod, serverContainer, "rping", serverPID)
+
+	// Settle window — give the server time to bind.
+	select {
+	case <-tctx.Done():
+		r.Err = fmt.Errorf("rping settle wait: %w", tctx.Err())
+		return r
+	case <-time.After(rdmaServerSettleDelay):
+	}
+
+	clientCmd := fmt.Sprintf("rping -c -a %s -p 9999 -C %d -v", test.DstIP, iterations)
+	cliRes, cliErr := kubeclient.ExecInPod(tctx, restConfig, namespace, clientPod, clientContainer, []string{"/bin/sh", "-c", clientCmd})
+	r.Stdout, r.Stderr = cliRes.Stdout, cliRes.Stderr
+	r.Err = cliErr
+	// rping exits 0 only on a clean run; non-zero exit propagates
+	// to cliErr.
+	r.OK = cliErr == nil
+	return r
+}
+
+// RunIbWriteBw runs an ib_write_bw bandwidth test for one src→dst pair.
+// Uses `-R` (RDMA-CM connection management — same handshake the
+// example DS expects) and `-s 65536 --report_gbits` to produce a
+// single-line, single-message-size output that's straightforward to
+// parse for a matrix cell. Each test allocates a fresh TCP listener
+// port (default 18515 + an orchestrator-supplied offset) to avoid
+// collisions with concurrent runs against unrelated rails.
+func RunIbWriteBw(ctx context.Context, restConfig *rest.Config, namespace string, serverPod, serverContainer string, clientPod, clientContainer string, test PingTest, port int) PingResult {
+	if port <= 0 {
+		port = 18515
+	}
+	r := PingResult{Test: test}
+	if test.SrcRDMADev == "" || test.DstRDMADev == "" {
+		r.Err = fmt.Errorf("ib_write_bw needs RDMA device names; got src=%q dst=%q", test.SrcRDMADev, test.DstRDMADev)
+		return r
+	}
+
+	tctx, cancel := context.WithTimeout(ctx, rdmaTestTimeout)
+	defer cancel()
+
+	// `ib_write_bw -d <dev> -R -s 65536 --report_gbits -p <port>`
+	// runs as the server when invoked without a peer IP, as the
+	// client when invoked with one. The server prints a banner and
+	// then waits; the client connects, runs the test, and both
+	// sides print the summary table.
+	serverCmd := fmt.Sprintf("nohup ib_write_bw -d %s -R -s 65536 --report_gbits -p %d >/tmp/ibwritebw-server.log 2>&1 & echo $!",
+		test.DstRDMADev, port)
+	srvRes, err := kubeclient.ExecInPod(tctx, restConfig, namespace, serverPod, serverContainer, []string{"/bin/sh", "-c", serverCmd})
+	if err != nil {
+		r.Err = fmt.Errorf("ib_write_bw server start: %w (stderr: %s)", err, srvRes.Stderr)
+		return r
+	}
+	serverPID := strings.TrimSpace(srvRes.Stdout)
+	defer killRDMAServer(restConfig, namespace, serverPod, serverContainer, "ib_write_bw", serverPID)
+
+	select {
+	case <-tctx.Done():
+		r.Err = fmt.Errorf("ib_write_bw settle wait: %w", tctx.Err())
+		return r
+	case <-time.After(rdmaServerSettleDelay):
+	}
+
+	clientCmd := fmt.Sprintf("ib_write_bw -d %s -R -s 65536 --report_gbits -p %d %s",
+		test.SrcRDMADev, port, test.DstIP)
+	cliRes, cliErr := kubeclient.ExecInPod(tctx, restConfig, namespace, clientPod, clientContainer, []string{"/bin/sh", "-c", clientCmd})
+	r.Stdout, r.Stderr = cliRes.Stdout, cliRes.Stderr
+
+	bw, msgRate, parseOK := parseIbWriteBwOutput(cliRes.Stdout)
+	r.BandwidthGbps = bw
+	r.MsgRateMpps = msgRate
+	switch {
+	case cliErr != nil && !parseOK:
+		r.Err = cliErr
+		r.OK = false
+	case !parseOK:
+		r.Err = fmt.Errorf("ib_write_bw output missing summary row")
+		r.OK = false
+	default:
+		// Treat any non-zero bandwidth as success — partial-bw
+		// numbers are still informative (slow rail, MTU
+		// mismatch, etc.) and the cell value tells the story.
+		r.OK = bw > 0
+	}
+	return r
+}
+
+// killRDMAServer is best-effort cleanup. We try the captured PID
+// first (precise) and fall back to a wildcard `pkill -f <prog>` for
+// the cases where the PID didn't survive the exec round-trip.
+// Errors are swallowed because by this point we've already recorded
+// the client's result — leaking a stale server is preferable to
+// failing the test on a cleanup hiccup.
+func killRDMAServer(restConfig *rest.Config, namespace, pod, container, prog, pid string) {
+	// Use a fresh background context so the cleanup runs even if
+	// the test's deadline already fired.
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	cmd := fmt.Sprintf("kill %s 2>/dev/null; pkill -f %q 2>/dev/null; true", pid, prog)
+	_, _ = kubeclient.ExecInPod(ctx, restConfig, namespace, pod, container, []string{"/bin/sh", "-c", cmd})
+}
+
+// ibWriteBwSummaryRe matches the single-row summary line that
+// `ib_write_bw -s 65536 --report_gbits` emits on both client and
+// server. Format (whitespace-separated columns, varies slightly
+// across perftest versions; this regex matches the common shape):
+//
+//	#bytes     #iterations  BW peak[Gb/sec]  BW average[Gb/sec]  MsgRate[Mpps]
+//	65536      5000          194.39           193.21               0.368
+//
+// The leading message-size column is always integer; subsequent
+// columns are float with optional decimals. We don't anchor the
+// regex to start-of-line because perftest sometimes prefixes the
+// summary with " ".
+var ibWriteBwSummaryRe = regexp.MustCompile(`^\s*\d+\s+\d+\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*$`)
+
+// parseIbWriteBwOutput scans the client's stdout for the summary
+// row. Returns the peak Gbps, message rate (Mpps), and ok=true when a
+// summary row was found. We return the *peak* rather than average
+// because the average can be dragged down by ramp-up cycles on the
+// first few iterations; for a "is this rail healthy" matrix the
+// peak is the more useful number.
+func parseIbWriteBwOutput(stdout string) (peakGbps, msgRateMpps float64, ok bool) {
+	for _, line := range strings.Split(stdout, "\n") {
+		m := ibWriteBwSummaryRe.FindStringSubmatch(line)
+		if m == nil {
+			continue
+		}
+		peak, err1 := strconv.ParseFloat(m[1], 64)
+		_, err2 := strconv.ParseFloat(m[2], 64) // average — read for the side effect of validating the regex; not surfaced
+		rate, err3 := strconv.ParseFloat(m[3], 64)
+		if err1 != nil || err2 != nil || err3 != nil {
+			continue
+		}
+		return peak, rate, true
+	}
+	return 0, 0, false
+}
diff --git a/pkg/networkoperatorplugin/connectivity/rdma_test.go b/pkg/networkoperatorplugin/connectivity/rdma_test.go
new file mode 100644
index 0000000..0da5ce0
--- /dev/null
+++ b/pkg/networkoperatorplugin/connectivity/rdma_test.go
@@ -0,0 +1,111 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package connectivity
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// realisticIbWriteBwOutput is a captured sample of perftest's
+// `ib_write_bw -d <dev> -R -s 65536 --report_gbits <peer>` client
+// output. Includes the banner, the column header, the summary row,
+// and the trailing banner so parseIbWriteBwOutput's regex anchoring
+// is exercised against realistic surrounding context.
+const realisticIbWriteBwOutput = `---------------------------------------------------------------------------------------
+                    RDMA_Write BW Test
+ Dual-port       : OFF		Device         : mlx5_4
+ Number of qps   : 1		Transport type : IB
+ Connection type : RC		Using SRQ      : OFF
+ PCIe relax order: ON
+ ibv_wr* API     : ON
+ TX depth        : 128
+ CQ Moderation   : 100
+ Mtu             : 1024[B]
+ Link type       : Ethernet
+ GID index       : 3
+ Max inline data : 0[B]
+ rdma_cm QPs	 : ON
+ Data ex. method : rdma_cm
+---------------------------------------------------------------------------------------
+ local address: LID 0000 QPN 0x0094 PSN 0x4d2cb8
+ GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:00:00:01
+ remote address: LID 0000 QPN 0x00a4 PSN 0xabcdef
+ GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:00:00:02
+---------------------------------------------------------------------------------------
+ #bytes     #iterations    BW peak[Gb/sec]    BW average[Gb/sec]   MsgRate[Mpps]
+ 65536      5000             194.39             193.21		   0.368459
+---------------------------------------------------------------------------------------
+`
+
+func TestParseIbWriteBwOutput(t *testing.T) {
+	t.Run("realistic summary row parses", func(t *testing.T) {
+		peak, rate, ok := parseIbWriteBwOutput(realisticIbWriteBwOutput)
+		require.True(t, ok)
+		assert.InDelta(t, 194.39, peak, 0.001)
+		assert.InDelta(t, 0.368459, rate, 0.0001)
+	})
+
+	t.Run("output without summary returns ok=false", func(t *testing.T) {
+		// Output the server side prints before the client
+		// finishes — no summary row, just the banner.
+		short := `------------------------------------------------------
+                    RDMA_Write BW Test
+ Device         : mlx5_4
+------------------------------------------------------
+`
+		peak, rate, ok := parseIbWriteBwOutput(short)
+		assert.False(t, ok)
+		assert.Equal(t, 0.0, peak)
+		assert.Equal(t, 0.0, rate)
+	})
+
+	t.Run("malformed summary line is ignored", func(t *testing.T) {
+		bad := ` 65536      5000    not_a_number    193.21    0.368
+`
+		_, _, ok := parseIbWriteBwOutput(bad)
+		assert.False(t, ok)
+	})
+
+	t.Run("smaller message size sample", func(t *testing.T) {
+		// perftest sometimes emits sub-1Gbps numbers — the
+		// regex must accept them.
+		sample := ` 2          1000           0.05               0.04                  3.125
+`
+		peak, rate, ok := parseIbWriteBwOutput(sample)
+		require.True(t, ok)
+		assert.InDelta(t, 0.05, peak, 0.001)
+		assert.InDelta(t, 3.125, rate, 0.001)
+	})
+}
+
+func TestPingTestKind_Predicates(t *testing.T) {
+	assert.True(t, RDMAPingSameRail.IsRDMAPing())
+	assert.True(t, RDMAPingCrossRail.IsRDMAPing())
+	assert.False(t, RDMABwSameRail.IsRDMAPing())
+
+	assert.True(t, RDMABwSameRail.IsRDMABw())
+	assert.True(t, RDMABwCrossRail.IsRDMABw())
+	assert.False(t, RDMAPingSameRail.IsRDMABw())
+
+	assert.True(t, RDMAPingCrossRail.IsCrossRail())
+	assert.True(t, RDMABwCrossRail.IsCrossRail())
+	assert.False(t, RDMAPingSameRail.IsCrossRail())
+	assert.False(t, RDMABwSameRail.IsCrossRail())
+}
diff --git a/pkg/networkoperatorplugin/connectivity/report.go b/pkg/networkoperatorplugin/connectivity/report.go
index 20e2164..e14ee1a 100644
--- a/pkg/networkoperatorplugin/connectivity/report.go
+++ b/pkg/networkoperatorplugin/connectivity/report.go
@@ -26,6 +26,7 @@ import (
 
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/crstate"
+	"github.com/nvidia/k8s-launch-kit/pkg/presetmatch"
 )
 
 // reportTemplate is the embedded HTML body the validate CLI's
@@ -38,10 +39,33 @@ import (
 //go:embed report.html.tmpl
 var reportTemplate string
 
+// OverallVerdict captures the high-level pass/fail outcome rendered
+// as a prominent banner at the top of the report. PASS means every
+// gating check succeeded (Helm release version, component versions,
+// manifest state, connectivity matrix). Preset deviations are
+// informational and do NOT downgrade PASS — they're surfaced
+// separately in the Node groups section.
+type OverallVerdict struct {
+	// Pass is true when every gating check succeeded.
+	Pass bool
+	// Reasons lists each individual gating check that failed (one
+	// short line per failure). Empty when Pass.
+	Reasons []string
+	// Notes lists informational items that don't gate (preset
+	// deviations, in-progress manifests when --wait wasn't used,
+	// matrix soft-skip). Surfaced in the banner subtitle when
+	// non-empty.
+	Notes []string
+}
+
 // ReportData is the input to RenderHTML. The validate CLI populates
 // it from values it has already computed (Manifests, Matrix) plus a
 // few small lookups (cluster API version, node labels).
 type ReportData struct {
+	// Verdict is the overall pass/fail outcome rendered as a
+	// prominent banner at the top of the report. Computed by the
+	// caller (CLI) from the same inputs that drive the exit code.
+	Verdict OverallVerdict
 	Cluster        ClusterInfo
 	Profile        ProfileInfo
 	NodeGroups     []NodeGroupInfo
@@ -52,8 +76,12 @@ type ReportData struct {
 	// sub-table under "Network Operator release" in the HTML
 	// report. Nil when the check couldn't run.
 	ComponentCheck *networkoperatorplugin.ComponentVersionCheck
-	Manifests      []networkoperatorplugin.ValidationResult
-	Matrix         *MatrixResult
+	// PresetMatches carries one Result per cluster group from
+	// pkg/presetmatch — surfaced under the "Node groups" section.
+	// Empty when validate ran without a usable cluster-config.yaml.
+	PresetMatches []presetmatch.Result
+	Manifests     []networkoperatorplugin.ValidationResult
+	Matrix        *MatrixResult
 	// Warnings is a flat list of one-line strings rendered as a
 	// bulleted rollup at the bottom of the report — typically the
 	// "in-progress manifest re-run later" notes and the matrix
@@ -79,13 +107,33 @@ type NodeGroupInfo struct {
 	IbCapable        bool
 	PresetApplied    bool
 	PresetDeviations []PresetDeviation
-	EastWestPFs      []PFInfo
-	NorthSouthPFs    []PFInfo
+	// EastWestPFs / NorthSouthPFs are the *actual* (discovered)
+	// PFs on the cluster. ExpectedEastWestPFs / ExpectedNorthSouthPFs
+	// mirror the layout but carry the certified topology's PFs from
+	// the matched preset (nil when no preset matched). When both are
+	// populated the report renders them as paired Actual / Expected
+	// sub-tables with mismatched rows highlighted in each.
+	EastWestPFs           []PFInfo
+	NorthSouthPFs         []PFInfo
+	ExpectedEastWestPFs   []PFInfo
+	ExpectedNorthSouthPFs []PFInfo
+	// PFCountMismatch is non-nil when the discovered PF count
+	// differs from the certified topology. Surfaced inline in the
+	// East-west PFs Actual header.
+	PFCountMismatch *PFCountMismatch
 }
 
 // PFInfo is one row in a node group's PF table. The fields mirror
 // PFConfig but as strings so the template doesn't need helpers for
 // "—" fallbacks on nil pointers.
+//
+// Mismatched is true when this row diverges from its counterpart in
+// the other table (the Actual table flags PCIs whose deviceID drifts
+// from the certified topology or PCIs the certified topology
+// doesn't list; the Expected table flags PCIs whose deviceID drifts
+// or PCIs the cluster doesn't actually have). Rendered as a tinted
+// row in both tables — the operator scans down and the diff is
+// obvious without inline annotations.
 type PFInfo struct {
 	PciAddress       string
 	DeviceID         string
@@ -97,7 +145,16 @@ type PFInfo struct {
 	PartNumber       string
 	NumaNode         string
 	ConnectedGPU     string
-	GPUProximity    string
+	GPUProximity     string
+	Mismatched       bool
+}
+
+// PFCountMismatch is set on NodeGroupInfo when the discovered PF
+// count differs from the certified topology — rendered next to the
+// "East-west PFs (N)" header rather than as a separate row.
+type PFCountMismatch struct {
+	Expected int
+	Got      int
 }
 
 // PresetDeviation is one row in a node group's deviation list —
@@ -152,7 +209,7 @@ type NodeInfo struct {
 // parse / execute failure; the writer is not flushed (callers
 // typically pass an *os.File which the OS will flush on Close).
 func RenderHTML(w io.Writer, data ReportData) error {
-	tmpl, err := template.New("verify-report").
+	tmpl, err := template.New("validation-report").
 		Funcs(reportFuncMap()).
 		Parse(reportTemplate)
 	if err != nil {
@@ -184,6 +241,37 @@ func reportFuncMap() template.FuncMap {
 			}
 			return "state-unknown"
 		},
+		// presetStatusClass maps presetmatch.Status to the same
+		// state-color CSS classes manifest rows use, so the visual
+		// language is consistent across the report.
+		"presetStatusClass": func(s presetmatch.Status) string {
+			switch s {
+			case presetmatch.StatusMatch:
+				return "state-success"
+			case presetmatch.StatusDeviation:
+				return "state-inprogress"
+			case presetmatch.StatusNotFound:
+				return "state-missing"
+			case presetmatch.StatusSkipped:
+				return "state-missing"
+			}
+			return "state-unknown"
+		},
+		// presetStatusLabel renders the human-facing label for the
+		// per-group platform-topology row's status badge.
+		"presetStatusLabel": func(s presetmatch.Status) string {
+			switch s {
+			case presetmatch.StatusMatch:
+				return "MATCH"
+			case presetmatch.StatusDeviation:
+				return "MISMATCH"
+			case presetmatch.StatusNotFound:
+				return "NOT CERTIFIED"
+			case presetmatch.StatusSkipped:
+				return "SKIPPED"
+			}
+			return "UNKNOWN"
+		},
 		// stateLabel renders the human-facing state name.
 		"stateLabel": func(s crstate.CRState) string {
 			switch s {
@@ -213,48 +301,86 @@ func reportFuncMap() template.FuncMap {
 			sort.Strings(out)
 			return out
 		},
-		// matrixByRail groups same-rail PingResults by rail name so
-		// the template can render one sub-table per rail.
+		// matrixByRail groups same-rail PingResults by (rail, kind
+		// family) so the template can render one sub-table per
+		// (rail, family) bucket. Families are RDMA-ping and
+		// RDMA-bandwidth — see PingTestKind.IsRDMAPing /
+		// IsRDMABw.
 		"matrixByRail": func(results []PingResult) []railSection {
-			byRail := map[string][]PingResult{}
-			rails := []string{}
+			type key struct {
+				rail string
+				fam  string
+			}
+			byKey := map[key][]PingResult{}
+			order := []key{}
 			for _, r := range results {
-				if r.Test.Kind != PingSameRail {
+				if r.Test.Kind.IsCrossRail() {
 					continue
 				}
-				if _, seen := byRail[r.Test.Rail]; !seen {
-					rails = append(rails, r.Test.Rail)
+				k := key{r.Test.Rail, htmlFamilyOf(r.Test.Kind)}
+				if _, seen := byKey[k]; !seen {
+					order = append(order, k)
 				}
-				byRail[r.Test.Rail] = append(byRail[r.Test.Rail], r)
+				byKey[k] = append(byKey[k], r)
 			}
-			sort.Strings(rails)
-			out := make([]railSection, 0, len(rails))
-			for _, rail := range rails {
-				rs := railSection{Rail: rail}
-				rs.Nodes, rs.Table = buildRailTable(byRail[rail])
+			// Sort deterministically: rail name, then a fixed
+			// family order so rping → ib_write_bw appear in
+			// execution order in the rendered report.
+			sort.Slice(order, func(i, j int) bool {
+				if order[i].rail != order[j].rail {
+					return order[i].rail < order[j].rail
+				}
+				return htmlFamilyRank(order[i].fam) < htmlFamilyRank(order[j].fam)
+			})
+			out := make([]railSection, 0, len(order))
+			for _, k := range order {
+				rs := railSection{Rail: k.rail, Family: k.fam}
+				rs.Nodes, rs.Table = buildRailTable(byKey[k])
 				out = append(out, rs)
 			}
 			return out
 		},
 		// matrixCrossRail filters down to the cross-rail canaries
-		// and sorts them deterministically.
-		"matrixCrossRail": func(results []PingResult) []PingResult {
-			cross := make([]PingResult, 0)
+		// and groups them by family. Sorted by family rank, then
+		// src/dst node within each family.
+		"matrixCrossRail": func(results []PingResult) []crossRailSection {
+			byFam := map[string][]PingResult{}
+			fams := []string{}
 			for _, r := range results {
-				if r.Test.Kind == PingCrossRail {
-					cross = append(cross, r)
+				if !r.Test.Kind.IsCrossRail() {
+					continue
 				}
-			}
-			sort.Slice(cross, func(i, j int) bool {
-				if cross[i].Test.SrcNode != cross[j].Test.SrcNode {
-					return cross[i].Test.SrcNode < cross[j].Test.SrcNode
+				fam := htmlFamilyOf(r.Test.Kind)
+				if _, seen := byFam[fam]; !seen {
+					fams = append(fams, fam)
 				}
-				return cross[i].Test.DstNode < cross[j].Test.DstNode
-			})
-			return cross
+				byFam[fam] = append(byFam[fam], r)
+			}
+			sort.Slice(fams, func(i, j int) bool { return htmlFamilyRank(fams[i]) < htmlFamilyRank(fams[j]) })
+			out := make([]crossRailSection, 0, len(fams))
+			for _, fam := range fams {
+				rows := byFam[fam]
+				sort.Slice(rows, func(i, j int) bool {
+					if rows[i].Test.SrcNode != rows[j].Test.SrcNode {
+						return rows[i].Test.SrcNode < rows[j].Test.SrcNode
+					}
+					return rows[i].Test.DstNode < rows[j].Test.DstNode
+				})
+				out = append(out, crossRailSection{Family: fam, Results: rows})
+			}
+			return out
+		},
+		// familyTitle is the human-friendly section header for a
+		// given kind family.
+		"familyTitle": func(fam string) string {
+			if fam == "ibbw" {
+				return "RDMA bandwidth (ib_write_bw)"
+			}
+			return "RDMA ping (rping)"
 		},
 		// cellClass returns the CSS class for a matrix cell based
-		// on the ping outcome.
+		// on the result outcome (family-agnostic — color depends
+		// only on pass/fail/missing).
 		"cellClass": func(r *PingResult) string {
 			if r == nil {
 				return "cell-missing"
@@ -264,21 +390,22 @@ func reportFuncMap() template.FuncMap {
 			}
 			return "cell-fail"
 		},
-		// cellText renders the cell's body (loss%/rtt or "ERR").
-		"cellText": func(r *PingResult) string {
+		// cellText renders the cell's body. Family-specific
+		// formatting: rping shows ✓/✗, ib_write_bw shows ✓ N Gbps.
+		"cellText": func(r *PingResult, fam string) string {
 			if r == nil {
 				return "·"
 			}
-			if r.OK {
-				if r.RTTAvgMs > 0 {
-					return fmt.Sprintf("✓ %d%% %.2fms", r.PacketLoss, r.RTTAvgMs)
+			if fam == "ibbw" {
+				if r.OK && r.BandwidthGbps > 0 {
+					return fmt.Sprintf("✓ %.1f Gbps", r.BandwidthGbps)
 				}
-				return fmt.Sprintf("✓ %d%%", r.PacketLoss)
+				return "✗ ERR"
 			}
-			if r.PacketLoss >= 0 {
-				return fmt.Sprintf("✗ %d%%", r.PacketLoss)
+			if r.OK {
+				return "✓"
 			}
-			return "✗ ERR"
+			return "✗"
 		},
 		// nodeLabel mirrors the text renderer's axisLabel: prefer
 		// SrcNode/DstNode, fall back to the pod name when the
@@ -298,13 +425,42 @@ func reportFuncMap() template.FuncMap {
 	}
 }
 
-// railSection feeds the per-rail sub-table in the report. Nodes is
-// the deterministic axis ordering; Table is a flat node→node→*result
-// map (the template indexes by string keys for cells).
+// railSection feeds the per-(rail, kind family) sub-table in the
+// report. Nodes is the deterministic axis ordering; Table is a flat
+// node→node→*result map (the template indexes by string keys for
+// cells). Family is one of "rping" / "ibbw" — drives the section
+// header and the cell-formatter dispatch.
 type railSection struct {
-	Rail  string
-	Nodes []string
-	Table map[string]map[string]*PingResult
+	Rail   string
+	Family string
+	Nodes  []string
+	Table  map[string]map[string]*PingResult
+}
+
+// crossRailSection groups cross-rail canary results by kind family
+// for the cross-rail list at the bottom of the matrix section.
+type crossRailSection struct {
+	Family  string
+	Results []PingResult
+}
+
+// htmlFamilyOf maps a PingTestKind to the family identifier used by
+// the HTML template's funcs (kept as a separate string from
+// text_report.go's kindFamily enum to keep package boundaries clean).
+func htmlFamilyOf(k PingTestKind) string {
+	if k.IsRDMABw() {
+		return "ibbw"
+	}
+	return "rping"
+}
+
+// htmlFamilyRank gives a stable ordering of families in the rendered
+// report — rping (QP establishment) before ib_write_bw (bandwidth).
+func htmlFamilyRank(fam string) int {
+	if fam == "ibbw" {
+		return 1
+	}
+	return 0
 }
 
 // buildRailTable indexes a slice of same-rail PingResults by source
diff --git a/pkg/networkoperatorplugin/connectivity/report.html.tmpl b/pkg/networkoperatorplugin/connectivity/report.html.tmpl
index fcf9368..cb8d958 100644
--- a/pkg/networkoperatorplugin/connectivity/report.html.tmpl
+++ b/pkg/networkoperatorplugin/connectivity/report.html.tmpl
@@ -123,6 +123,36 @@ main {
   padding: 32px 40px 48px;
 }
 
+.verdict-banner {
+  border-radius: 8px;
+  padding: 20px 24px;
+  margin-bottom: 28px;
+  border: 1px solid;
+}
+.verdict-banner.verdict-pass {
+  background: var(--success-bg);
+  border-color: var(--success-bd);
+  color: var(--success);
+}
+.verdict-banner.verdict-fail {
+  background: var(--error-bg);
+  border-color: var(--error-bd);
+  color: var(--error);
+}
+.verdict-headline {
+  font-size: 18px;
+  font-weight: 700;
+  letter-spacing: 0.4px;
+}
+.verdict-list {
+  margin-top: 10px;
+  padding-left: 20px;
+  font-size: 13.5px;
+  color: var(--text);
+}
+.verdict-list li { margin-top: 2px; }
+.verdict-notes { color: var(--text-muted); }
+
 section {
   background: var(--bg);
   border: 1px solid var(--border);
@@ -324,6 +354,26 @@ details table th, details table td { padding: 6px 10px; }
 }
 .matrix-rail code { font-size: 12px; color: var(--text); }
 
+/* Colour-coded headers for the paired Actual / Expected PF
+ * sub-tables. Actual = red ("this is what the cluster has;
+ * the highlighted rows are wrong"). Expected = orange ("this
+ * is what the certified topology says; the highlighted rows
+ * are what's missing or mismatched"). */
+.matrix-rail.pf-section-actual {
+  color: #991B1B;                            /* red-800 */
+  background: rgba(220, 38, 38, 0.10);
+  border-left: 3px solid #DC2626;            /* red-600 */
+  padding: 6px 10px;
+  border-radius: 4px;
+}
+.matrix-rail.pf-section-expected {
+  color: #9A3412;                            /* orange-800 */
+  background: rgba(234, 88, 12, 0.10);
+  border-left: 3px solid #EA580C;            /* orange-600 */
+  padding: 6px 10px;
+  border-radius: 4px;
+}
+
 .summary-line {
   font-size: 14px;
   color: var(--text);
@@ -372,6 +422,35 @@ details table th, details table td { padding: 6px 10px; }
 }
 .muted { color: var(--text-muted); }
 
+/* Platform-topology diff styling. The PF section renders paired
+ * Actual / Expected sub-tables; rows that don't match their
+ * counterpart in the other table are tinted amber. The reader
+ * scans down both tables and the divergence is obvious without
+ * inline annotations. */
+.mismatch-inline {
+  color: #B45309;       /* amber-700, contrast against the row */
+  font-size: 11px;
+  margin-left: 6px;
+}
+/* Mismatched rows take their tint from the table they're in,
+ * matching the section header: red in Actual, orange in
+ * Expected. The adjacent-sibling combinator targets the <table>
+ * that immediately follows each section header div. */
+.matrix-rail.pf-section-actual + table tr.pf-mismatched {
+  background: rgba(220, 38, 38, 0.12);
+}
+.matrix-rail.pf-section-actual + table tr.pf-mismatched td {
+  border-top: 1px solid rgba(220, 38, 38, 0.32);
+  border-bottom: 1px solid rgba(220, 38, 38, 0.32);
+}
+.matrix-rail.pf-section-expected + table tr.pf-mismatched {
+  background: rgba(234, 88, 12, 0.12);
+}
+.matrix-rail.pf-section-expected + table tr.pf-mismatched td {
+  border-top: 1px solid rgba(234, 88, 12, 0.32);
+  border-bottom: 1px solid rgba(234, 88, 12, 0.32);
+}
+
 .warning-list {
   list-style: none;
   padding-left: 0;
@@ -419,6 +498,26 @@ footer strong { color: var(--text-muted); }
 </header>
 <main>
 
+<div class="verdict-banner verdict-{{if .Verdict.Pass}}pass{{else}}fail{{end}}">
+  <div class="verdict-headline">
+    {{if .Verdict.Pass}}✓ VALIDATION PASSED{{else}}✗ VALIDATION FAILED{{end}}
+  </div>
+  {{- if .Verdict.Reasons}}
+  <ul class="verdict-list verdict-reasons">
+    {{- range .Verdict.Reasons}}
+    <li>{{.}}</li>
+    {{- end}}
+  </ul>
+  {{- end}}
+  {{- if .Verdict.Notes}}
+  <ul class="verdict-list verdict-notes">
+    {{- range .Verdict.Notes}}
+    <li>{{.}}</li>
+    {{- end}}
+  </ul>
+  {{- end}}
+</div>
+
 <section>
   <h2>Profile</h2>
   <div class="body">
@@ -436,6 +535,36 @@ footer strong { color: var(--text-muted); }
 <section>
   <h2>Node groups ({{len .NodeGroups}})</h2>
   <div class="body">
+    {{- if .PresetMatches}}
+    <div class="matrix-rail">Platform topology validation</div>
+    <table>
+      <thead>
+        <tr><th>Group</th><th>Machine</th><th>GPU</th><th>Status</th><th>Detail</th></tr>
+      </thead>
+      <tbody>
+        {{- range .PresetMatches}}
+        <tr class="{{presetStatusClass .Status}}">
+          <td><code>{{or .Group "—"}}</code></td>
+          <td>{{or .MachineType "—"}}</td>
+          <td>{{or .GPUType "—"}}</td>
+          <td><span class="badge {{presetStatusClass .Status}}">{{presetStatusLabel .Status}}</span></td>
+          <td>
+            {{- if eq (printf "%s" .Status) "match"}}
+            matches certified topology for <code>{{.PresetName}}</code> server type
+            {{- else if eq (printf "%s" .Status) "deviation"}}
+            does not match certified topology for <code>{{.PresetName}}</code> server type · {{.Reason}}
+            {{- else if eq (printf "%s" .Status) "not-found"}}
+            no certified topology available for this server type · {{.Reason}}
+            {{- else}}
+            {{.Reason}}
+            {{- end}}
+          </td>
+        </tr>
+        {{- end}}
+      </tbody>
+    </table>
+    {{- end}}
+
     {{- if .NodeGroups}}
     {{- range .NodeGroups}}
     <div class="nodegroup">
@@ -466,17 +595,39 @@ footer strong { color: var(--text-muted); }
           {{- if and (not .SriovCapable) (not .RdmaCapable) (not .IbCapable)}}<span class="muted">none</span>{{end}}
         </dd>
         {{- if .PresetApplied}}
-        <dt>Preset</dt><dd>applied</dd>
+        <dt>Certified topology</dt><dd>applied</dd>
         {{- end}}
       </dl>
 
       {{- if .EastWestPFs}}
-      <div class="matrix-rail">East-west PFs ({{len .EastWestPFs}})</div>
+      <div class="matrix-rail pf-section-actual">East-west PFs — Actual ({{if .PFCountMismatch}}{{.PFCountMismatch.Got}} <span class="mismatch-inline">certified topology expects {{.PFCountMismatch.Expected}}</span>{{else}}{{len .EastWestPFs}}{{end}})</div>
       <table>
         <thead><tr><th>PCI</th><th>Device ID</th><th>Rail</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th><th>NUMA</th><th>GPU</th></tr></thead>
         <tbody>
           {{- range .EastWestPFs}}
-          <tr>
+          <tr{{if .Mismatched}} class="pf-mismatched"{{end}}>
+            <td><code>{{.PciAddress}}</code></td>
+            <td><code>{{or .DeviceID "—"}}</code></td>
+            <td>{{.Rail}}</td>
+            <td><code>{{or .NetworkInterface "—"}}</code></td>
+            <td><code>{{or .RdmaDevice "—"}}</code></td>
+            <td>{{or .PSID "—"}}</td>
+            <td>{{or .PartNumber "—"}}</td>
+            <td>{{or .NumaNode "—"}}</td>
+            <td>{{or .ConnectedGPU "—"}}{{if .GPUProximity}} ({{.GPUProximity}}){{end}}</td>
+          </tr>
+          {{- end}}
+        </tbody>
+      </table>
+      {{- end}}
+
+      {{- if .ExpectedEastWestPFs}}
+      <div class="matrix-rail pf-section-expected">East-west PFs — Expected by certified topology ({{len .ExpectedEastWestPFs}})</div>
+      <table>
+        <thead><tr><th>PCI</th><th>Device ID</th><th>Rail</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th><th>NUMA</th><th>GPU</th></tr></thead>
+        <tbody>
+          {{- range .ExpectedEastWestPFs}}
+          <tr{{if .Mismatched}} class="pf-mismatched"{{end}}>
             <td><code>{{.PciAddress}}</code></td>
             <td><code>{{or .DeviceID "—"}}</code></td>
             <td>{{.Rail}}</td>
@@ -493,12 +644,12 @@ footer strong { color: var(--text-muted); }
       {{- end}}
 
       {{- if .NorthSouthPFs}}
-      <div class="matrix-rail">North-south PFs ({{len .NorthSouthPFs}})</div>
+      <div class="matrix-rail pf-section-actual">North-south PFs — Actual ({{len .NorthSouthPFs}})</div>
       <table>
         <thead><tr><th>PCI</th><th>Device ID</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th></tr></thead>
         <tbody>
           {{- range .NorthSouthPFs}}
-          <tr>
+          <tr{{if .Mismatched}} class="pf-mismatched"{{end}}>
             <td><code>{{.PciAddress}}</code></td>
             <td><code>{{or .DeviceID "—"}}</code></td>
             <td><code>{{or .NetworkInterface "—"}}</code></td>
@@ -511,13 +662,20 @@ footer strong { color: var(--text-muted); }
       </table>
       {{- end}}
 
-      {{- if .PresetDeviations}}
-      <div class="matrix-rail">Preset deviations ({{len .PresetDeviations}})</div>
+      {{- if .ExpectedNorthSouthPFs}}
+      <div class="matrix-rail pf-section-expected">North-south PFs — Expected by certified topology ({{len .ExpectedNorthSouthPFs}})</div>
       <table>
-        <thead><tr><th>Field</th><th>Expected</th><th>Got</th><th>Detail</th></tr></thead>
+        <thead><tr><th>PCI</th><th>Device ID</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th></tr></thead>
         <tbody>
-          {{- range .PresetDeviations}}
-          <tr><td>{{.Field}}</td><td><code>{{or .Expected "—"}}</code></td><td><code>{{or .Got "—"}}</code></td><td>{{.Detail}}</td></tr>
+          {{- range .ExpectedNorthSouthPFs}}
+          <tr{{if .Mismatched}} class="pf-mismatched"{{end}}>
+            <td><code>{{.PciAddress}}</code></td>
+            <td><code>{{or .DeviceID "—"}}</code></td>
+            <td><code>{{or .NetworkInterface "—"}}</code></td>
+            <td><code>{{or .RdmaDevice "—"}}</code></td>
+            <td>{{or .PSID "—"}}</td>
+            <td>{{or .PartNumber "—"}}</td>
+          </tr>
           {{- end}}
         </tbody>
       </table>
@@ -681,12 +839,12 @@ footer strong { color: var(--text-muted); }
       <p class="summary-line">
         <strong>{{.Matrix.Summary.Passed}}</strong>/{{.Matrix.Summary.TotalTests}} passed
         {{- if gt .Matrix.Summary.Failed 0}} · <strong>{{.Matrix.Summary.Failed}}</strong> failed{{end}}
-        {{- if gt .Matrix.Summary.ExecErrors 0}} · <strong>{{.Matrix.Summary.ExecErrors}}</strong> exec error(s){{end}}
       </p>
 
       {{- $rails := matrixByRail .Matrix.PingResults}}
       {{- range $rails}}
-        <div class="matrix-rail">Rail <code>{{.Rail}}</code></div>
+        {{- $fam := .Family}}
+        <div class="matrix-rail">Rail <code>{{.Rail}}</code> — {{familyTitle $fam}}</div>
         <table class="matrix-table">
           <thead>
             <tr>
@@ -706,7 +864,7 @@ footer strong { color: var(--text-muted); }
                 <td class="cell-self">—</td>
                 {{- else}}
                   {{- $cell := index (index $rs.Table $src) $dst}}
-                  <td class="{{cellClass $cell}}">{{cellText $cell}}</td>
+                  <td class="{{cellClass $cell}}">{{cellText $cell $fam}}</td>
                 {{- end}}
               {{- end}}
             </tr>
@@ -715,21 +873,22 @@ footer strong { color: var(--text-muted); }
         </table>
       {{- end}}
 
-      {{- $cross := matrixCrossRail .Matrix.PingResults}}
-      {{- if $cross}}
-        <div class="matrix-rail">Cross-rail canary</div>
+      {{- $crossSections := matrixCrossRail .Matrix.PingResults}}
+      {{- range $crossSections}}
+        {{- $fam := .Family}}
+        <div class="matrix-rail">Cross-rail canary — {{familyTitle $fam}}</div>
         <table class="matrix-table">
           <thead>
             <tr><th>Source</th><th>Src rail</th><th>Destination</th><th>Dst rail</th><th>Result</th></tr>
           </thead>
           <tbody>
-            {{- range $cross}}
+            {{- range .Results}}
             <tr>
               <td>{{srcLabel .Test}}</td>
               <td><code>{{.Test.SrcRail}}</code></td>
               <td>{{dstLabel .Test}}</td>
               <td><code>{{.Test.DstRail}}</code></td>
-              <td class="{{cellClass .}}">{{cellText .}}</td>
+              <td class="{{cellClass .}}">{{cellText . $fam}}</td>
             </tr>
             {{- end}}
           </tbody>
diff --git a/pkg/networkoperatorplugin/connectivity/report_test.go b/pkg/networkoperatorplugin/connectivity/report_test.go
index 55c5201..b2542bb 100644
--- a/pkg/networkoperatorplugin/connectivity/report_test.go
+++ b/pkg/networkoperatorplugin/connectivity/report_test.go
@@ -24,8 +24,10 @@ import (
 	"testing"
 	"time"
 
+	"github.com/nvidia/k8s-launch-kit/pkg/config"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/crstate"
+	"github.com/nvidia/k8s-launch-kit/pkg/presetmatch"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -33,7 +35,7 @@ import (
 // updateGolden re-writes the checked-in golden file when set. Run with
 // `go test ./pkg/networkoperatorplugin/connectivity/ -update-golden`
 // after intentional template changes.
-var updateGolden = flag.Bool("update-golden", false, "rewrite testdata/verify-report.golden.html with the current renderer output")
+var updateGolden = flag.Bool("update-golden", false, "rewrite testdata/k8s-launch-kit-validation-report.golden.html with the current renderer output")
 
 // fixtureData is the deterministic input the renderer is exercised
 // against. Includes one of every state badge, an IN-PROGRESS row with
@@ -42,6 +44,13 @@ var updateGolden = flag.Bool("update-golden", false, "rewrite testdata/verify-re
 // surface.
 func fixtureData() ReportData {
 	return ReportData{
+		Verdict: OverallVerdict{
+			Pass: false,
+			Reasons: []string{
+				"1 RDMA test(s) failed in the connectivity matrix",
+				`The detected platform topology does not match the certified topology for ACME-vendor-a-h200 server type (see Node groups section for the per-device diff)`,
+			},
+		},
 		Cluster: ClusterInfo{
 			L8kVersion:        "v0.0.0-test",
 			GeneratedAt:       time.Date(2026, 5, 23, 11, 30, 0, 0, time.UTC),
@@ -65,14 +74,43 @@ func fixtureData() ReportData {
 				NodeSelector: map[string]string{"nvidia.kubernetes-launch-kit.machine": "vendor-a-h200"},
 				WorkerNodes:  []string{"node-a", "node-b"},
 				SriovCapable: true, RdmaCapable: true,
+				// Actual = discovered hardware.
 				EastWestPFs: []PFInfo{
+					// deviceID drift from the certified topology
+					// at this PCI — row tinted.
 					{PciAddress: "0000:08:00.0", DeviceID: "1021", Rail: "0", Traffic: "east-west",
-						NetworkInterface: "eth_r0", RdmaDevice: "rdma_r0", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "0"},
-					{PciAddress: "0000:08:00.1", DeviceID: "1021", Rail: "1", Traffic: "east-west",
+						NetworkInterface: "eth_r0", RdmaDevice: "rdma_r0", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "0",
+						Mismatched: true},
+					// Matches the certified topology — clean row.
+					{PciAddress: "0000:08:00.1", DeviceID: "1023", Rail: "1", Traffic: "east-west",
+						NetworkInterface: "eth_r1", RdmaDevice: "rdma_r1", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "1"},
+					// Cluster has this PCI but the certified
+					// topology doesn't — row tinted.
+					{PciAddress: "0000:08:00.2", DeviceID: "1023", Rail: "2", Traffic: "east-west",
+						NetworkInterface: "eth_r2", RdmaDevice: "rdma_r2", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "2",
+						Mismatched: true},
+				},
+				// Expected = certified topology from the matched preset.
+				ExpectedEastWestPFs: []PFInfo{
+					// Same PCI as Actual[0] but the expected
+					// deviceID — tinted in this table too because
+					// the actual deviceID drifts.
+					{PciAddress: "0000:08:00.0", DeviceID: "1023", Rail: "0", Traffic: "east-west",
+						NetworkInterface: "eth_r0", RdmaDevice: "rdma_r0", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "0",
+						Mismatched: true},
+					// Matches the cluster — clean row.
+					{PciAddress: "0000:08:00.1", DeviceID: "1023", Rail: "1", Traffic: "east-west",
 						NetworkInterface: "eth_r1", RdmaDevice: "rdma_r1", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "1"},
+					// Certified topology expects this PCI but the
+					// cluster doesn't have it — tinted.
+					{PciAddress: "0000:08:00.3", DeviceID: "1023", Rail: "3", Traffic: "east-west",
+						NetworkInterface: "eth_r3", RdmaDevice: "rdma_r3", PartNumber: "MCX713106AE", NumaNode: "0", ConnectedGPU: "3",
+						Mismatched: true},
 				},
 				PresetDeviations: []PresetDeviation{
-					{Field: "deviceID", Expected: "1023", Got: "1021", Detail: "expected ConnectX-8 (1023), found ConnectX-7 (1021)"},
+					{Field: "deviceID", Expected: "1023@0000:08:00.0", Got: "1021@0000:08:00.0", Detail: "device ID at PCI address differs from preset"},
+					{Field: "pciAddress", Got: "0000:08:00.2", Detail: "discovered PCI address not present in preset"},
+					{Field: "pciAddress", Expected: "0000:08:00.3", Detail: "preset PCI address not present on discovered hardware"},
 				},
 				PresetApplied: true,
 			},
@@ -142,47 +180,84 @@ func fixtureData() ReportData {
 		},
 		Matrix: &MatrixResult{
 			PingResults: []PingResult{
+				// rping: same-rail, both directions, rail-0 (both pass)
 				{
-					Test: PingTest{Kind: PingSameRail,
+					Test: PingTest{Kind: RDMAPingSameRail,
 						SrcNode: "node-a", DstNode: "node-b",
 						Rail: "rail-0", SrcRail: "rail-0", DstRail: "rail-0"},
-					OK: true, PacketLoss: 0, RTTAvgMs: 0.12,
+					OK: true,
 				},
 				{
-					Test: PingTest{Kind: PingSameRail,
+					Test: PingTest{Kind: RDMAPingSameRail,
 						SrcNode: "node-b", DstNode: "node-a",
 						Rail: "rail-0", SrcRail: "rail-0", DstRail: "rail-0"},
-					OK: true, PacketLoss: 0, RTTAvgMs: 0.15,
+					OK: true,
 				},
+				// rping: same-rail, rail-1 (one direction fails)
 				{
-					Test: PingTest{Kind: PingSameRail,
+					Test: PingTest{Kind: RDMAPingSameRail,
 						SrcNode: "node-a", DstNode: "node-b",
 						Rail: "rail-1", SrcRail: "rail-1", DstRail: "rail-1"},
-					OK: true, PacketLoss: 0, RTTAvgMs: 0.18,
+					OK: true,
 				},
 				{
-					Test: PingTest{Kind: PingSameRail,
+					Test: PingTest{Kind: RDMAPingSameRail,
 						SrcNode: "node-b", DstNode: "node-a",
 						Rail: "rail-1", SrcRail: "rail-1", DstRail: "rail-1"},
-					OK: false, PacketLoss: 100,
+					OK: false,
+				},
+				// rping: cross-rail canary
+				{
+					Test: PingTest{Kind: RDMAPingCrossRail,
+						SrcNode: "node-a", DstNode: "node-b",
+						Rail: "rail-0→rail-1", SrcRail: "rail-0", DstRail: "rail-1"},
+					OK: true,
+				},
+				// ib_write_bw: same-rail rail-0 both directions pass with bandwidth
+				{
+					Test: PingTest{Kind: RDMABwSameRail,
+						SrcNode: "node-a", DstNode: "node-b",
+						Rail: "rail-0", SrcRail: "rail-0", DstRail: "rail-0"},
+					OK: true, BandwidthGbps: 194.4,
+				},
+				{
+					Test: PingTest{Kind: RDMABwSameRail,
+						SrcNode: "node-b", DstNode: "node-a",
+						Rail: "rail-0", SrcRail: "rail-0", DstRail: "rail-0"},
+					OK: true, BandwidthGbps: 193.8,
 				},
+				// ib_write_bw: cross-rail canary passes with bandwidth
 				{
-					Test: PingTest{Kind: PingCrossRail,
+					Test: PingTest{Kind: RDMABwCrossRail,
 						SrcNode: "node-a", DstNode: "node-b",
 						Rail: "rail-0→rail-1", SrcRail: "rail-0", DstRail: "rail-1"},
-					OK: true, PacketLoss: 0, RTTAvgMs: 0.20,
+					OK: true, BandwidthGbps: 187.6,
 				},
 			},
-			Summary: MatrixSummary{TotalTests: 5, Passed: 4, Failed: 1},
+			Summary: MatrixSummary{TotalTests: 8, Passed: 7, Failed: 1},
 		},
 		Warnings: []string{
 			"SriovNetworkNodePolicy/ethernet-sriov-rail-0 is in-progress on 1/2 nodes — re-run later.",
 			"IPPool/rail-0-pool not found — l8k generate was not run before validate.",
 		},
+		PresetMatches: []presetmatch.Result{
+			{
+				Group:        "vendor-a-h200",
+				MachineType:  "vendor-a",
+				GPUType:      "h200",
+				Manufacturer: "ACME",
+				Status:       presetmatch.StatusDeviation,
+				PresetName:   "vendor-a/h200",
+				Reason:       "1 deviation(s) from matched preset",
+				Deviations: []config.PresetDeviationEntry{
+					{Field: "deviceID", Expected: "1023", Got: "1021", Detail: "expected ConnectX-8 (1023), found ConnectX-7 (1021)"},
+				},
+			},
+		},
 	}
 }
 
-const goldenPath = "testdata/verify-report.golden.html"
+const goldenPath = "testdata/k8s-launch-kit-validation-report.golden.html"
 
 func TestRenderHTML_Golden(t *testing.T) {
 	var buf bytes.Buffer
diff --git a/pkg/networkoperatorplugin/connectivity/result.go b/pkg/networkoperatorplugin/connectivity/result.go
new file mode 100644
index 0000000..5b5d757
--- /dev/null
+++ b/pkg/networkoperatorplugin/connectivity/result.go
@@ -0,0 +1,38 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package connectivity
+
+// PingResult carries the outcome of one src→dst matrix test. The
+// matrix currently runs rping and ib_write_bw — fields specific to a
+// kind stay zero for tests of the other kind:
+//
+//   - rping: OK + Err + Stdout/Stderr; BandwidthGbps stays at 0.
+//   - ib_write_bw: OK + Err + Stdout/Stderr + BandwidthGbps +
+//     MsgRateMpps populated by parseIbWriteBwOutput.
+//
+// The struct name is kept for backwards compatibility with the
+// historical ICMP-first pipeline; the matrix is RDMA-only now but
+// callers and the JSON schema continue to reference PingResult.
+type PingResult struct {
+	Test          PingTest
+	OK            bool
+	BandwidthGbps float64 // ib_write_bw only; 0 when n/a
+	MsgRateMpps   float64 // ib_write_bw only; 0 when n/a
+	Stdout        string
+	Stderr        string
+	Err           error
+}
diff --git a/pkg/networkoperatorplugin/connectivity/testdata/verify-report.golden.html b/pkg/networkoperatorplugin/connectivity/testdata/k8s-launch-kit-validation-report.golden.html
similarity index 74%
rename from pkg/networkoperatorplugin/connectivity/testdata/verify-report.golden.html
rename to pkg/networkoperatorplugin/connectivity/testdata/k8s-launch-kit-validation-report.golden.html
index a294c7f..2001f5d 100644
--- a/pkg/networkoperatorplugin/connectivity/testdata/verify-report.golden.html
+++ b/pkg/networkoperatorplugin/connectivity/testdata/k8s-launch-kit-validation-report.golden.html
@@ -104,6 +104,36 @@
   padding: 32px 40px 48px;
 }
 
+.verdict-banner {
+  border-radius: 8px;
+  padding: 20px 24px;
+  margin-bottom: 28px;
+  border: 1px solid;
+}
+.verdict-banner.verdict-pass {
+  background: var(--success-bg);
+  border-color: var(--success-bd);
+  color: var(--success);
+}
+.verdict-banner.verdict-fail {
+  background: var(--error-bg);
+  border-color: var(--error-bd);
+  color: var(--error);
+}
+.verdict-headline {
+  font-size: 18px;
+  font-weight: 700;
+  letter-spacing: 0.4px;
+}
+.verdict-list {
+  margin-top: 10px;
+  padding-left: 20px;
+  font-size: 13.5px;
+  color: var(--text);
+}
+.verdict-list li { margin-top: 2px; }
+.verdict-notes { color: var(--text-muted); }
+
 section {
   background: var(--bg);
   border: 1px solid var(--border);
@@ -297,6 +327,22 @@
 }
 .matrix-rail code { font-size: 12px; color: var(--text); }
 
+ 
+.matrix-rail.pf-section-actual {
+  color: #991B1B;                             
+  background: rgba(220, 38, 38, 0.10);
+  border-left: 3px solid #DC2626;             
+  padding: 6px 10px;
+  border-radius: 4px;
+}
+.matrix-rail.pf-section-expected {
+  color: #9A3412;                             
+  background: rgba(234, 88, 12, 0.10);
+  border-left: 3px solid #EA580C;             
+  padding: 6px 10px;
+  border-radius: 4px;
+}
+
 .summary-line {
   font-size: 14px;
   color: var(--text);
@@ -345,6 +391,28 @@
 }
 .muted { color: var(--text-muted); }
 
+ 
+.mismatch-inline {
+  color: #B45309;        
+  font-size: 11px;
+  margin-left: 6px;
+}
+ 
+.matrix-rail.pf-section-actual + table tr.pf-mismatched {
+  background: rgba(220, 38, 38, 0.12);
+}
+.matrix-rail.pf-section-actual + table tr.pf-mismatched td {
+  border-top: 1px solid rgba(220, 38, 38, 0.32);
+  border-bottom: 1px solid rgba(220, 38, 38, 0.32);
+}
+.matrix-rail.pf-section-expected + table tr.pf-mismatched {
+  background: rgba(234, 88, 12, 0.12);
+}
+.matrix-rail.pf-section-expected + table tr.pf-mismatched td {
+  border-top: 1px solid rgba(234, 88, 12, 0.32);
+  border-bottom: 1px solid rgba(234, 88, 12, 0.32);
+}
+
 .warning-list {
   list-style: none;
   padding-left: 0;
@@ -390,6 +458,16 @@ <h1>Validation Report</h1>
 </header>
 <main>
 
+<div class="verdict-banner verdict-fail">
+  <div class="verdict-headline">
+    ✗ VALIDATION FAILED
+  </div>
+  <ul class="verdict-list verdict-reasons">
+    <li>1 RDMA test(s) failed in the connectivity matrix</li>
+    <li>The detected platform topology does not match the certified topology for ACME-vendor-a-h200 server type (see Node groups section for the per-device diff)</li>
+  </ul>
+</div>
+
 <section>
   <h2>Profile</h2>
   <div class="body">
@@ -404,6 +482,23 @@ <h2>Profile</h2>
 <section>
   <h2>Node groups (1)</h2>
   <div class="body">
+    <div class="matrix-rail">Platform topology validation</div>
+    <table>
+      <thead>
+        <tr><th>Group</th><th>Machine</th><th>GPU</th><th>Status</th><th>Detail</th></tr>
+      </thead>
+      <tbody>
+        <tr class="state-inprogress">
+          <td><code>vendor-a-h200</code></td>
+          <td>vendor-a</td>
+          <td>h200</td>
+          <td><span class="badge state-inprogress">MISMATCH</span></td>
+          <td>
+            does not match certified topology for <code>vendor-a/h200</code> server type · 1 deviation(s) from matched preset
+          </td>
+        </tr>
+      </tbody>
+    </table>
     <div class="nodegroup">
       <div class="nodegroup-head">
         <code>vendor-a-h200</code> · <span class="muted">vendor-a</span> · <span class="muted">h200</span> · <span class="muted">Ethernet</span>
@@ -418,13 +513,13 @@ <h2>Node groups (1)</h2>
         <dt>Capabilities</dt>
         <dd><span class="cap-pill">sriov</span><span class="cap-pill">rdma</span>
         </dd>
-        <dt>Preset</dt><dd>applied</dd>
+        <dt>Certified topology</dt><dd>applied</dd>
       </dl>
-      <div class="matrix-rail">East-west PFs (2)</div>
+      <div class="matrix-rail pf-section-actual">East-west PFs — Actual (3)</div>
       <table>
         <thead><tr><th>PCI</th><th>Device ID</th><th>Rail</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th><th>NUMA</th><th>GPU</th></tr></thead>
         <tbody>
-          <tr>
+          <tr class="pf-mismatched">
             <td><code>0000:08:00.0</code></td>
             <td><code>1021</code></td>
             <td>0</td>
@@ -437,7 +532,7 @@ <h2>Node groups (1)</h2>
           </tr>
           <tr>
             <td><code>0000:08:00.1</code></td>
-            <td><code>1021</code></td>
+            <td><code>1023</code></td>
             <td>1</td>
             <td><code>eth_r1</code></td>
             <td><code>rdma_r1</code></td>
@@ -446,13 +541,56 @@ <h2>Node groups (1)</h2>
             <td>0</td>
             <td>1</td>
           </tr>
+          <tr class="pf-mismatched">
+            <td><code>0000:08:00.2</code></td>
+            <td><code>1023</code></td>
+            <td>2</td>
+            <td><code>eth_r2</code></td>
+            <td><code>rdma_r2</code></td>
+            <td>—</td>
+            <td>MCX713106AE</td>
+            <td>0</td>
+            <td>2</td>
+          </tr>
         </tbody>
       </table>
-      <div class="matrix-rail">Preset deviations (1)</div>
+      <div class="matrix-rail pf-section-expected">East-west PFs — Expected by certified topology (3)</div>
       <table>
-        <thead><tr><th>Field</th><th>Expected</th><th>Got</th><th>Detail</th></tr></thead>
+        <thead><tr><th>PCI</th><th>Device ID</th><th>Rail</th><th>Netdev</th><th>RDMA</th><th>PSID</th><th>Part #</th><th>NUMA</th><th>GPU</th></tr></thead>
         <tbody>
-          <tr><td>deviceID</td><td><code>1023</code></td><td><code>1021</code></td><td>expected ConnectX-8 (1023), found ConnectX-7 (1021)</td></tr>
+          <tr class="pf-mismatched">
+            <td><code>0000:08:00.0</code></td>
+            <td><code>1023</code></td>
+            <td>0</td>
+            <td><code>eth_r0</code></td>
+            <td><code>rdma_r0</code></td>
+            <td>—</td>
+            <td>MCX713106AE</td>
+            <td>0</td>
+            <td>0</td>
+          </tr>
+          <tr>
+            <td><code>0000:08:00.1</code></td>
+            <td><code>1023</code></td>
+            <td>1</td>
+            <td><code>eth_r1</code></td>
+            <td><code>rdma_r1</code></td>
+            <td>—</td>
+            <td>MCX713106AE</td>
+            <td>0</td>
+            <td>1</td>
+          </tr>
+          <tr class="pf-mismatched">
+            <td><code>0000:08:00.3</code></td>
+            <td><code>1023</code></td>
+            <td>3</td>
+            <td><code>eth_r3</code></td>
+            <td><code>rdma_r3</code></td>
+            <td>—</td>
+            <td>MCX713106AE</td>
+            <td>0</td>
+            <td>3</td>
+          </tr>
         </tbody>
       </table>
     </div>
@@ -622,9 +760,9 @@ <h2>Manifest state (4)</h2>
   <h2>Connectivity matrix</h2>
   <div class="body">
       <p class="summary-line">
-        <strong>4</strong>/5 passed · <strong>1</strong> failed
+        <strong>7</strong>/8 passed · <strong>1</strong> failed
       </p>
-        <div class="matrix-rail">Rail <code>rail-0</code></div>
+        <div class="matrix-rail">Rail <code>rail-0</code> — RDMA ping (rping)</div>
         <table class="matrix-table">
           <thead>
             <tr>
@@ -637,16 +775,16 @@ <h2>Connectivity matrix</h2>
             <tr>
               <td>node-a</td>
                 <td class="cell-self">—</td>
-                  <td class="cell-pass">✓ 0% 0.12ms</td>
+                  <td class="cell-pass">✓</td>
             </tr>
             <tr>
               <td>node-b</td>
-                  <td class="cell-pass">✓ 0% 0.15ms</td>
+                  <td class="cell-pass">✓</td>
                 <td class="cell-self">—</td>
             </tr>
           </tbody>
         </table>
-        <div class="matrix-rail">Rail <code>rail-1</code></div>
+        <div class="matrix-rail">Rail <code>rail-0</code> — RDMA bandwidth (ib_write_bw)</div>
         <table class="matrix-table">
           <thead>
             <tr>
@@ -659,16 +797,53 @@ <h2>Connectivity matrix</h2>
             <tr>
               <td>node-a</td>
                 <td class="cell-self">—</td>
-                  <td class="cell-pass">✓ 0% 0.18ms</td>
+                  <td class="cell-pass">✓ 194.4 Gbps</td>
             </tr>
             <tr>
               <td>node-b</td>
-                  <td class="cell-fail">✗ 100%</td>
+                  <td class="cell-pass">✓ 193.8 Gbps</td>
                 <td class="cell-self">—</td>
             </tr>
           </tbody>
         </table>
-        <div class="matrix-rail">Cross-rail canary</div>
+        <div class="matrix-rail">Rail <code>rail-1</code> — RDMA ping (rping)</div>
+        <table class="matrix-table">
+          <thead>
+            <tr>
+              <th>src \ dst</th>
+              <th>node-a</th>
+              <th>node-b</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>node-a</td>
+                <td class="cell-self">—</td>
+                  <td class="cell-pass">✓</td>
+            </tr>
+            <tr>
+              <td>node-b</td>
+                  <td class="cell-fail">✗</td>
+                <td class="cell-self">—</td>
+            </tr>
+          </tbody>
+        </table>
+        <div class="matrix-rail">Cross-rail canary — RDMA ping (rping)</div>
+        <table class="matrix-table">
+          <thead>
+            <tr><th>Source</th><th>Src rail</th><th>Destination</th><th>Dst rail</th><th>Result</th></tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>node-a</td>
+              <td><code>rail-0</code></td>
+              <td>node-b</td>
+              <td><code>rail-1</code></td>
+              <td class="cell-pass">✓</td>
+            </tr>
+          </tbody>
+        </table>
+        <div class="matrix-rail">Cross-rail canary — RDMA bandwidth (ib_write_bw)</div>
         <table class="matrix-table">
           <thead>
             <tr><th>Source</th><th>Src rail</th><th>Destination</th><th>Dst rail</th><th>Result</th></tr>
@@ -679,7 +854,7 @@ <h2>Connectivity matrix</h2>
               <td><code>rail-0</code></td>
               <td>node-b</td>
               <td><code>rail-1</code></td>
-              <td class="cell-pass">✓ 0% 0.20ms</td>
+              <td class="cell-pass">✓ 187.6 Gbps</td>
             </tr>
           </tbody>
         </table>
diff --git a/pkg/networkoperatorplugin/connectivity/text_report.go b/pkg/networkoperatorplugin/connectivity/text_report.go
index c479047..84b9177 100644
--- a/pkg/networkoperatorplugin/connectivity/text_report.go
+++ b/pkg/networkoperatorplugin/connectivity/text_report.go
@@ -33,12 +33,11 @@ import (
 // structured MatrixResult that the validate CLI marshals to stdout.
 //
 // Cells:
-//   - "—"      src equals dst (self-ping, not run)
+//   - "—"      src equals dst (self-test, not run)
 //   - "·"      no test for this (src,dst,rail) — shared-rail set
 //     didn't include both endpoints
-//   - "✓ 0%"   ping passed; packet loss percentage when non-zero,
-//     RTT when present (e.g. "✓ 0% 0.5ms")
-//   - "✗ 100%" ping failed; "✗ ERR" if the exec itself errored
+//   - rping       — "✓" / "✗" / "✗ ERR"
+//   - ib_write_bw — "✓ 194.4 Gbps" / "✗ ERR"
 //
 // ANSI color is applied only when uiOutput.IsTTY() — keeps log files
 // and CI pipelines free of escape sequences.
@@ -51,34 +50,75 @@ func RenderMatrixText(uiOutput ui.Output, result *MatrixResult) {
 	}
 
 	tty := uiOutput.IsTTY()
-	byRail, crossRail, nodesSorted, railsSorted := groupResults(result.PingResults)
+	// Group per (rail, kind family). One grid is rendered for
+	// every (rail, family) bucket that has at least one result, in
+	// the test-execution order so the reader sees rping
+	// (QP-establishment canary) before ib_write_bw (bandwidth).
+	byRail, byCross, nodes, rails := groupResultsByKind(result.PingResults)
 
-	for _, rail := range railsSorted {
-		uiOutput.Info("")
-		uiOutput.Info("Rail %s:", rail)
-		for _, line := range renderRailGrid(nodesSorted, byRail[rail], tty) {
-			uiOutput.Info("%s", line)
+	families := []kindFamily{familyRPing, familyIbBw}
+	for _, rail := range rails {
+		for _, fam := range families {
+			grid, ok := byRail[rail][fam]
+			if !ok {
+				continue
+			}
+			uiOutput.Info("")
+			uiOutput.Info("Rail %s — %s:", rail, familyTitle(fam))
+			for _, line := range renderRailGrid(nodes, grid, fam, tty) {
+				uiOutput.Info("%s", line)
+			}
 		}
 	}
 
-	if len(crossRail) > 0 {
+	for _, fam := range families {
+		cross, ok := byCross[fam]
+		if !ok || len(cross) == 0 {
+			continue
+		}
 		uiOutput.Info("")
-		uiOutput.Info("Cross-rail canary:")
-		for _, line := range renderCrossRailList(crossRail, tty) {
+		uiOutput.Info("Cross-rail canary — %s:", familyTitle(fam))
+		for _, line := range renderCrossRailList(cross, fam, tty) {
 			uiOutput.Info("%s", line)
 		}
 	}
 }
 
-// groupResults indexes ping results by rail and source node for the
-// per-rail grid, plus a flat slice for the cross-rail canary. Returns
-// the deterministic node and rail orderings so callers don't have to
-// sort again. Node names are used as the axis labels because they're
-// what operators recognize ("worker-03" vs the DaemonSet-generated
-// pod suffix like "sriov-test-7t8h9"); the underlying pod name is
-// still carried on the PingTest for the SPDY exec path.
-func groupResults(results []PingResult) (byRail map[string]map[string]map[string]*PingResult, crossRail []*PingResult, nodes []string, rails []string) {
-	byRail = map[string]map[string]map[string]*PingResult{}
+// kindFamily collapses the four PingTestKind values down to the two
+// families the renderer cares about: rping and ib_write_bw. The
+// same-rail / cross-rail axis is handled separately (per-rail grid vs
+// cross-rail list).
+type kindFamily int
+
+const (
+	familyRPing kindFamily = iota
+	familyIbBw
+)
+
+func kindFamilyOf(k PingTestKind) kindFamily {
+	if k.IsRDMABw() {
+		return familyIbBw
+	}
+	return familyRPing
+}
+
+func familyTitle(f kindFamily) string {
+	if f == familyIbBw {
+		return "RDMA bandwidth (ib_write_bw)"
+	}
+	return "RDMA ping (rping)"
+}
+
+// groupResultsByKind indexes results by (rail, family, src) → dst →
+// result for the per-rail grids, and by family → []result for the
+// cross-rail canary lists. Returns the deterministic node and rail
+// orderings so callers don't have to sort again. Node names are used
+// as the axis labels because they're what operators recognize
+// ("worker-03" vs the DaemonSet-generated pod suffix); the underlying
+// pod name is still carried on the PingTest for the SPDY exec path.
+func groupResultsByKind(results []PingResult) (byRail map[string]map[kindFamily]map[string]map[string]*PingResult, byCross map[kindFamily][]*PingResult, nodes []string, rails []string) {
+	byRail = map[string]map[kindFamily]map[string]map[string]*PingResult{}
+	byCross = map[kindFamily][]*PingResult{}
 	nodeSet := map[string]struct{}{}
 	railSet := map[string]struct{}{}
 
@@ -88,18 +128,22 @@ func groupResults(results []PingResult) (byRail map[string]map[string]map[string
 		dst := axisLabel(r.Test.DstNode, r.Test.DstPod)
 		nodeSet[src] = struct{}{}
 		nodeSet[dst] = struct{}{}
-		if r.Test.Kind == PingCrossRail {
-			crossRail = append(crossRail, r)
+		fam := kindFamilyOf(r.Test.Kind)
+		if r.Test.Kind.IsCrossRail() {
+			byCross[fam] = append(byCross[fam], r)
 			continue
 		}
 		if _, ok := byRail[r.Test.Rail]; !ok {
-			byRail[r.Test.Rail] = map[string]map[string]*PingResult{}
+			byRail[r.Test.Rail] = map[kindFamily]map[string]map[string]*PingResult{}
 			railSet[r.Test.Rail] = struct{}{}
 		}
-		if _, ok := byRail[r.Test.Rail][src]; !ok {
-			byRail[r.Test.Rail][src] = map[string]*PingResult{}
+		if _, ok := byRail[r.Test.Rail][fam]; !ok {
+			byRail[r.Test.Rail][fam] = map[string]map[string]*PingResult{}
+		}
+		if _, ok := byRail[r.Test.Rail][fam][src]; !ok {
+			byRail[r.Test.Rail][fam][src] = map[string]*PingResult{}
 		}
-		byRail[r.Test.Rail][src][dst] = r
+		byRail[r.Test.Rail][fam][src][dst] = r
 	}
 
 	nodes = make([]string, 0, len(nodeSet))
@@ -130,7 +174,9 @@ func axisLabel(node, pod string) string {
 // renderRailGrid produces the lines for one rail's src×dst grid, with
 // columns aligned via text/tabwriter. Axis labels are node names
 // (with pod-name fallback for endpoints whose NodeName wasn't set).
-func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty bool) []string {
+// fam selects per-kind cell formatting (ICMP shows loss + RTT,
+// rping shows ✓/✗, ib_write_bw shows ✓ N Gbps).
+func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, fam kindFamily, tty bool) []string {
 	var buf bytes.Buffer
 	tw := tabwriter.NewWriter(&buf, 0, 0, 2, ' ', 0)
 
@@ -144,7 +190,7 @@ func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty
 	for _, src := range nodes {
 		fmt.Fprintf(tw, "  %s\t", shortPodName(src))
 		for _, dst := range nodes {
-			fmt.Fprintf(tw, "%s\t", cellFor(src, dst, table[src][dst], tty))
+			fmt.Fprintf(tw, "%s\t", cellFor(src, dst, table[src][dst], fam, tty))
 		}
 		fmt.Fprintln(tw)
 	}
@@ -158,7 +204,7 @@ func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty
 // asymmetric (srcRail, dstRail) shape neatly. Same node-name fallback
 // as the per-rail grid: prefer Pod.Spec.NodeName, fall back to the
 // pod name.
-func renderCrossRailList(results []*PingResult, tty bool) []string {
+func renderCrossRailList(results []*PingResult, fam kindFamily, tty bool) []string {
 	type row struct {
 		src, dst string
 		r        *PingResult
@@ -184,7 +230,7 @@ func renderCrossRailList(results []*PingResult, tty bool) []string {
 		left := fmt.Sprintf("  %s [%s]\t→ %s [%s]\t",
 			shortPodName(row.src), row.r.Test.SrcRail,
 			shortPodName(row.dst), row.r.Test.DstRail)
-		fmt.Fprintf(tw, "%s%s\n", left, cellDetail(row.r, tty))
+		fmt.Fprintf(tw, "%s%s\n", left, cellDetail(row.r, fam, tty))
 	}
 	tw.Flush()
 	return trailingTrim(strings.Split(buf.String(), "\n"))
@@ -193,31 +239,24 @@ func renderCrossRailList(results []*PingResult, tty bool) []string {
 // cellFor renders one src×dst grid cell. self-pairs render as "—",
 // missing pairs as "·" (the rail set didn't pair these two pods —
 // rare; e.g. one pod's multus annotation was missing this rail).
-func cellFor(src, dst string, r *PingResult, tty bool) string {
+func cellFor(src, dst string, r *PingResult, fam kindFamily, tty bool) string {
 	if src == dst {
 		return "—"
 	}
 	if r == nil {
 		return "·"
 	}
-	return cellDetail(r, tty)
+	return cellDetail(r, fam, tty)
 }
 
 // cellDetail formats a single result into its terse cell representation
-// + optional ANSI color when TTY.
-func cellDetail(r *PingResult, tty bool) string {
-	var body string
-	switch {
-	case r.OK:
-		body = fmt.Sprintf("✓ %d%%", r.PacketLoss)
-		if r.RTTAvgMs > 0 {
-			body = fmt.Sprintf("%s %.1fms", body, r.RTTAvgMs)
-		}
-	case r.PacketLoss >= 0:
-		body = fmt.Sprintf("✗ %d%%", r.PacketLoss)
-	default:
-		body = "✗ ERR"
-	}
+// + optional ANSI color when TTY. The body shape depends on the
+// kind family:
+//
+//   - rping:       ✓ / ✗
+//   - ib_write_bw: ✓ 194.4 Gbps / ✗ ERR
+func cellDetail(r *PingResult, fam kindFamily, tty bool) string {
+	body := cellBody(r, fam)
 	if tty {
 		if r.OK {
 			return "\033[32m" + body + "\033[0m"
@@ -227,6 +266,20 @@ func cellDetail(r *PingResult, tty bool) string {
 	return body
 }
 
+func cellBody(r *PingResult, fam kindFamily) string {
+	if fam == familyIbBw {
+		if r.OK && r.BandwidthGbps > 0 {
+			return fmt.Sprintf("✓ %.1f Gbps", r.BandwidthGbps)
+		}
+		return "✗ ERR"
+	}
+	// rping
+	if r.OK {
+		return "✓"
+	}
+	return "✗"
+}
+
 // shortPodName trims a long pod name to keep grid columns from blowing
 // out — DaemonSet pods often have a 5-char random suffix after a long
 // app name. We keep the leading portion + the last 5 chars (the hash)
diff --git a/pkg/networkoperatorplugin/connectivity/text_report_test.go b/pkg/networkoperatorplugin/connectivity/text_report_test.go
index f3df1a3..7c34a92 100644
--- a/pkg/networkoperatorplugin/connectivity/text_report_test.go
+++ b/pkg/networkoperatorplugin/connectivity/text_report_test.go
@@ -30,58 +30,75 @@ import (
 // the order it was emitted so tests can assert on exact lines.
 type captureOutput struct{ lines []string }
 
-func (c *captureOutput) Info(format string, args ...interface{})    { c.lines = append(c.lines, fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Success(format string, args ...interface{}) { c.lines = append(c.lines, "SUCCESS: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Warning(format string, args ...interface{}) { c.lines = append(c.lines, "WARNING: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Error(format string, args ...interface{})   { c.lines = append(c.lines, "ERROR: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) StartProgress(message string) ui.Progress   { return &captureProgress{out: c, msg: message} }
-func (c *captureOutput) Header(text string)                         {}
-func (c *captureOutput) Section(text string)                        { c.lines = append(c.lines, "SECTION: "+text) }
-func (c *captureOutput) Confirm(string) (bool, error)               { return true, nil }
-func (c *captureOutput) IsTTY() bool                                { return false }
+func (c *captureOutput) Info(format string, args ...interface{}) {
+	c.lines = append(c.lines, fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Success(format string, args ...interface{}) {
+	c.lines = append(c.lines, "SUCCESS: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Warning(format string, args ...interface{}) {
+	c.lines = append(c.lines, "WARNING: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Error(format string, args ...interface{}) {
+	c.lines = append(c.lines, "ERROR: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) StartProgress(message string) ui.Progress {
+	return &captureProgress{out: c, msg: message}
+}
+func (c *captureOutput) Header(text string)            {}
+func (c *captureOutput) Section(text string)           { c.lines = append(c.lines, "SECTION: "+text) }
+func (c *captureOutput) Confirm(string) (bool, error)  { return true, nil }
+func (c *captureOutput) IsTTY() bool                   { return false }
 
 type captureProgress struct {
 	out *captureOutput
 	msg string
 }
 
-func (p *captureProgress) Update(string)         {}
-func (p *captureProgress) Success(m string)      { p.out.lines = append(p.out.lines, "PROGRESS_OK: "+m) }
-func (p *captureProgress) Fail(m string)         { p.out.lines = append(p.out.lines, "PROGRESS_FAIL: "+m) }
+func (p *captureProgress) Update(string)    {}
+func (p *captureProgress) Success(m string) { p.out.lines = append(p.out.lines, "PROGRESS_OK: "+m) }
+func (p *captureProgress) Fail(m string)    { p.out.lines = append(p.out.lines, "PROGRESS_FAIL: "+m) }
 
-// Helpers synthesize results with both pod and node names so the
-// renderer's node-label preference is exercised.
+// Helpers synthesize RDMA results so the renderer's per-(rail, family)
+// grouping and node-label fallback are exercised.
 
-func successResult(src, dst, rail, srcIP, dstIP string, lossPercent int, rttMs float64) PingResult {
+func rpingResult(src, dst, rail string, ok bool) PingResult {
 	return PingResult{
 		Test: PingTest{
-			Kind:    PingSameRail,
+			Kind:    RDMAPingSameRail,
 			SrcPod:  src + "-pod",
 			DstPod:  dst + "-pod",
 			SrcNode: src,
 			DstNode: dst,
 			Rail:    rail,
-			SrcIP:   srcIP,
-			DstIP:   dstIP,
 			SrcRail: rail,
 			DstRail: rail,
 		},
-		OK:         lossPercent < 100,
-		PacketLoss: lossPercent,
-		RTTAvgMs:   rttMs,
+		OK: ok,
 	}
 }
 
-func failResult(src, dst, rail string, lossPercent int) PingResult {
-	r := successResult(src, dst, rail, "", "", lossPercent, 0)
-	r.OK = false
-	return r
+func ibBwResult(src, dst, rail string, ok bool, bwGbps float64) PingResult {
+	return PingResult{
+		Test: PingTest{
+			Kind:    RDMABwSameRail,
+			SrcPod:  src + "-pod",
+			DstPod:  dst + "-pod",
+			SrcNode: src,
+			DstNode: dst,
+			Rail:    rail,
+			SrcRail: rail,
+			DstRail: rail,
+		},
+		OK:            ok,
+		BandwidthGbps: bwGbps,
+	}
 }
 
-func crossResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
+func crossRpingResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
 	return PingResult{
 		Test: PingTest{
-			Kind:    PingCrossRail,
+			Kind:    RDMAPingCrossRail,
 			SrcPod:  src + "-pod",
 			DstPod:  dst + "-pod",
 			SrcNode: src,
@@ -90,23 +107,24 @@ func crossResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
 			SrcRail: srcRail,
 			DstRail: dstRail,
 		},
-		OK:         ok,
-		PacketLoss: 0,
+		OK: ok,
 	}
 }
 
 func TestRenderMatrixText_RailGridAndCrossRail(t *testing.T) {
 	result := &MatrixResult{
 		PingResults: []PingResult{
-			// Rail rail-0: 2 nodes, all green
-			successResult("worker-1", "worker-2", "rail-0", "10.0.0.1", "10.0.0.2", 0, 0.5),
-			successResult("worker-2", "worker-1", "rail-0", "10.0.0.2", "10.0.0.1", 0, 0.6),
-			// Rail rail-1: one direction fails
-			successResult("worker-1", "worker-2", "rail-1", "10.0.1.1", "10.0.1.2", 0, 0.7),
-			failResult("worker-2", "worker-1", "rail-1", 100),
+			// Rail rail-0: 2 nodes, both rping pass
+			rpingResult("worker-1", "worker-2", "rail-0", true),
+			rpingResult("worker-2", "worker-1", "rail-0", true),
+			ibBwResult("worker-1", "worker-2", "rail-0", true, 194.4),
+			ibBwResult("worker-2", "worker-1", "rail-0", true, 193.8),
+			// Rail rail-1: one rping direction fails
+			rpingResult("worker-1", "worker-2", "rail-1", true),
+			rpingResult("worker-2", "worker-1", "rail-1", false),
 			// Cross-rail canaries
-			crossResult("worker-1", "worker-2", "rail-0", "rail-1", true),
-			crossResult("worker-2", "worker-1", "rail-0", "rail-1", false),
+			crossRpingResult("worker-1", "worker-2", "rail-0", "rail-1", true),
+			crossRpingResult("worker-2", "worker-1", "rail-0", "rail-1", false),
 		},
 	}
 
@@ -115,21 +133,25 @@ func TestRenderMatrixText_RailGridAndCrossRail(t *testing.T) {
 
 	joined := strings.Join(out.lines, "\n")
 
-	// Sanity: both rails rendered.
-	assert.Contains(t, joined, "Rail rail-0:")
-	assert.Contains(t, joined, "Rail rail-1:")
+	// Sanity: both rails rendered. The kind family appears in the
+	// header so we look for the "Rail <name> — RDMA ping" prefix.
+	assert.Contains(t, joined, "Rail rail-0 — RDMA ping (rping):")
+	assert.Contains(t, joined, "Rail rail-1 — RDMA ping (rping):")
+	assert.Contains(t, joined, "Rail rail-0 — RDMA bandwidth (ib_write_bw):")
 	// Cross-rail section rendered.
-	assert.Contains(t, joined, "Cross-rail canary:")
+	assert.Contains(t, joined, "Cross-rail canary — RDMA ping (rping):")
 	// Header row of the grid.
 	assert.Contains(t, joined, "src \\ dst")
 	// Axis labels must be node names, NOT pod names.
 	assert.Contains(t, joined, "worker-1")
 	assert.Contains(t, joined, "worker-2")
 	assert.NotContains(t, joined, "worker-1-pod")
-	// Sample cells: green ✓ in non-TTY mode is just plain text.
-	assert.Contains(t, joined, "✓ 0% 0.5ms")
-	// Failure cell shows ✗ with packet loss.
-	assert.Contains(t, joined, "✗ 100%")
+	// Sample cells: rping ✓ in non-TTY mode is just plain text.
+	assert.Contains(t, joined, "✓")
+	// ib_write_bw cell shows formatted Gbps.
+	assert.Contains(t, joined, "194.4 Gbps")
+	// Failure cell shows ✗ for rping.
+	assert.Contains(t, joined, "✗")
 	// Self-pairs are dashes.
 	selfDash := 0
 	for _, l := range out.lines {
@@ -169,27 +191,46 @@ func TestShortPodName(t *testing.T) {
 
 func TestCellFor(t *testing.T) {
 	t.Run("self pair is dash", func(t *testing.T) {
-		assert.Equal(t, "—", cellFor("pod-a", "pod-a", nil, false))
+		assert.Equal(t, "—", cellFor("pod-a", "pod-a", nil, familyRPing, false))
 	})
 	t.Run("missing result is bullet", func(t *testing.T) {
-		assert.Equal(t, "·", cellFor("pod-a", "pod-b", nil, false))
+		assert.Equal(t, "·", cellFor("pod-a", "pod-b", nil, familyRPing, false))
+	})
+	t.Run("rping OK is bare checkmark", func(t *testing.T) {
+		r := PingResult{OK: true}
+		assert.Equal(t, "✓", cellFor("a", "b", &r, familyRPing, false))
+	})
+	t.Run("rping fail is bare X", func(t *testing.T) {
+		r := PingResult{OK: false}
+		assert.Equal(t, "✗", cellFor("a", "b", &r, familyRPing, false))
 	})
-	t.Run("OK result formats with loss + rtt", func(t *testing.T) {
-		r := PingResult{OK: true, PacketLoss: 0, RTTAvgMs: 1.23}
-		assert.Equal(t, "✓ 0% 1.2ms", cellFor("a", "b", &r, false))
+	t.Run("ib_write_bw OK shows Gbps", func(t *testing.T) {
+		r := PingResult{OK: true, BandwidthGbps: 194.39}
+		assert.Equal(t, "✓ 194.4 Gbps", cellFor("a", "b", &r, familyIbBw, false))
 	})
-	t.Run("partial loss is failure", func(t *testing.T) {
-		r := PingResult{OK: false, PacketLoss: 33}
-		assert.Equal(t, "✗ 33%", cellFor("a", "b", &r, false))
+	t.Run("ib_write_bw fail with no bandwidth is ERR", func(t *testing.T) {
+		r := PingResult{OK: false}
+		assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, familyIbBw, false))
 	})
-	t.Run("exec error has no loss reading", func(t *testing.T) {
-		r := PingResult{OK: false, PacketLoss: -1}
-		assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, false))
+	t.Run("ib_write_bw OK but zero bandwidth is ERR", func(t *testing.T) {
+		// Defensive: OK=true with zero bandwidth shouldn't happen
+		// in practice, but if it does the cell renders as ERR
+		// since a "0 Gbps" link is broken even if perftest exited
+		// cleanly.
+		r := PingResult{OK: true, BandwidthGbps: 0}
+		assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, familyIbBw, false))
 	})
-	t.Run("TTY mode wraps in ANSI green", func(t *testing.T) {
-		r := PingResult{OK: true, PacketLoss: 0}
-		got := cellFor("a", "b", &r, true)
+	t.Run("TTY mode wraps in ANSI green for OK", func(t *testing.T) {
+		r := PingResult{OK: true}
+		got := cellFor("a", "b", &r, familyRPing, true)
 		assert.Contains(t, got, "\033[32m")
 		assert.Contains(t, got, "\033[0m")
 	})
 }
+
+func TestKindFamilyOf(t *testing.T) {
+	assert.Equal(t, familyRPing, kindFamilyOf(RDMAPingSameRail))
+	assert.Equal(t, familyRPing, kindFamilyOf(RDMAPingCrossRail))
+	assert.Equal(t, familyIbBw, kindFamilyOf(RDMABwSameRail))
+	assert.Equal(t, familyIbBw, kindFamilyOf(RDMABwCrossRail))
+}
diff --git a/pkg/networkoperatorplugin/crstate/registry.go b/pkg/networkoperatorplugin/crstate/registry.go
index 027c5aa..7c157e0 100644
--- a/pkg/networkoperatorplugin/crstate/registry.go
+++ b/pkg/networkoperatorplugin/crstate/registry.go
@@ -66,3 +66,45 @@ func (r *Registry) Validate(ctx context.Context, c client.Client, obj *unstructu
 	}
 	return DefaultExistenceValidator(ctx, c, obj)
 }
+
+// NeedsObservationGate reports whether a CR's authoritative status
+// lives on the object itself (vs on a companion CR). The deploy
+// state machine uses this to decide whether the post-apply
+// resourceVersion bump is a meaningful "controller has reacted"
+// signal:
+//
+//   - true  — validator reads `obj.status.*` directly (NCP/NNP/the
+//     three Mellanox Network Kinds, SpectrumXRailPoolConfig
+//     v1alpha2). Until the controller writes status back the RV
+//     stays at the apply-time value, and the validator would return
+//     stale data from the previous reconcile. Gate the verdict
+//     until live RV moves past apply-time RV.
+//
+//   - false — validator reads companion CRs whose lifecycle is
+//     independent of the apply (SriovNetworkNodePolicy →
+//     SriovNetworkNodeState per node; NicInterfaceNameTemplate /
+//     NicConfigurationTemplate → NicDevice per device). The
+//     companion's RV evolves on its own schedule and the
+//     SriovNetworkNodePolicy itself never gets a status write, so
+//     gating on its RV would block forever. Validators in this
+//     bucket get to give an immediate verdict — staleness is their
+//     own problem to detect (and the SR-IOV validator does, by
+//     bucketing the "Succeeded but numVfs not at target" case as
+//     in-progress per the SR-IOV soft-progress rule).
+//
+// IPPool / CIDRPool / SriovNetwork / SriovIBNetwork / OVSNetwork
+// fall through to the default existence-only validator and also
+// don't need the gate — there's no status to read at all.
+func NeedsObservationGate(gvk schema.GroupVersionKind) bool {
+	if gvk.Group == "mellanox.com" && gvk.Version == "v1alpha1" {
+		switch gvk.Kind {
+		case "NicClusterPolicy", "NicNodePolicy",
+			"HostDeviceNetwork", "IPoIBNetwork", "MacvlanNetwork":
+			return true
+		}
+	}
+	if gvk.Group == spcxGroup && gvk.Version == spcxVersionAlpha2 && gvk.Kind == spcxKindRailPoolConfig {
+		return true
+	}
+	return false
+}
diff --git a/pkg/networkoperatorplugin/crstate/registry_test.go b/pkg/networkoperatorplugin/crstate/registry_test.go
index 160931f..b8f4514 100644
--- a/pkg/networkoperatorplugin/crstate/registry_test.go
+++ b/pkg/networkoperatorplugin/crstate/registry_test.go
@@ -190,6 +190,42 @@ func TestStatusStringValidator_RegisteredKinds(t *testing.T) {
 	}
 }
 
+func TestNeedsObservationGate(t *testing.T) {
+	cases := []struct {
+		gvk  schema.GroupVersionKind
+		want bool
+		name string
+	}{
+		// Kinds whose validators read .status on the CR itself —
+		// gate is meaningful, controller's RV bump is the signal.
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "NicClusterPolicy"}, true, "NicClusterPolicy"},
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "NicNodePolicy"}, true, "NicNodePolicy"},
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "HostDeviceNetwork"}, true, "HostDeviceNetwork"},
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "IPoIBNetwork"}, true, "IPoIBNetwork"},
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "MacvlanNetwork"}, true, "MacvlanNetwork"},
+		{schema.GroupVersionKind{Group: "spectrumx.nvidia.com", Version: "v1alpha2", Kind: "SpectrumXRailPoolConfig"}, true, "SpectrumXRailPoolConfig"},
+
+		// Kinds whose validators read companion CRs — gate would
+		// block forever waiting for an RV bump that never lands.
+		{schema.GroupVersionKind{Group: "sriovnetwork.openshift.io", Version: "v1", Kind: "SriovNetworkNodePolicy"}, false, "SriovNetworkNodePolicy (companion = SriovNetworkNodeState)"},
+		{schema.GroupVersionKind{Group: "configuration.net.nvidia.com", Version: "v1alpha1", Kind: "NicInterfaceNameTemplate"}, false, "NicInterfaceNameTemplate (companion = NicDevice)"},
+		{schema.GroupVersionKind{Group: "configuration.net.nvidia.com", Version: "v1alpha1", Kind: "NicConfigurationTemplate"}, false, "NicConfigurationTemplate (companion = NicDevice)"},
+
+		// Existence-only Kinds — no status at all, gate irrelevant.
+		{schema.GroupVersionKind{Group: "nv-ipam.nvidia.com", Version: "v1alpha1", Kind: "IPPool"}, false, "IPPool"},
+		{schema.GroupVersionKind{Group: "sriovnetwork.openshift.io", Version: "v1", Kind: "SriovNetwork"}, false, "SriovNetwork"},
+
+		// Wrong group / version — never gate.
+		{schema.GroupVersionKind{Group: "mellanox.com", Version: "v1beta1", Kind: "NicClusterPolicy"}, false, "wrong version"},
+		{schema.GroupVersionKind{Group: "other.example.com", Version: "v1", Kind: "NicClusterPolicy"}, false, "wrong group"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.want, NeedsObservationGate(tc.gvk))
+		})
+	}
+}
+
 // node returns a labelled *corev1.Node for fake-client seeding.
 func node(name string, labels map[string]string) *corev1.Node {
 	n := &corev1.Node{}
diff --git a/pkg/networkoperatorplugin/deploy.go b/pkg/networkoperatorplugin/deploy.go
index 37a181a..3b6374e 100644
--- a/pkg/networkoperatorplugin/deploy.go
+++ b/pkg/networkoperatorplugin/deploy.go
@@ -191,7 +191,7 @@ func ApplyManifestsFromDir(ctx context.Context, kubeClient client.Client, manife
 			}
 
 			am := appliedManifest{obj: obj}
-			if obj.GetGeneration() > preApplyGen {
+			if obj.GetGeneration() > preApplyGen && crstate.NeedsObservationGate(obj.GroupVersionKind()) {
 				am.awaitObservationAfterRV = obj.GetResourceVersion()
 			}
 			appliedOthers = append(appliedOthers, am)
@@ -331,11 +331,12 @@ func applyAndWait(ctx context.Context, c client.Client, registry *crstate.Regist
 
 	// applyUnstructured does an SSA Patch that returns the
 	// server-decided object on `obj`. Its current resourceVersion
-	// is "the version right after our apply". If the spec changed,
-	// any subsequent live RV != this value means the controller
-	// has written status since.
+	// is "the version right after our apply". If the spec changed
+	// AND this Kind's validator reads .status from the CR itself
+	// (vs from companion CRs), any subsequent live RV != this
+	// value means the controller has written status since.
 	awaitObservationAfterRV := ""
-	if obj.GetGeneration() > preApplyGen {
+	if obj.GetGeneration() > preApplyGen && crstate.NeedsObservationGate(obj.GroupVersionKind()) {
 		awaitObservationAfterRV = obj.GetResourceVersion()
 		log.Log.V(1).Info("Spec changed; gating poll on controller observation",
 			"kind", obj.GetKind(), "name", obj.GetName(),
diff --git a/pkg/networkoperatorplugin/discovery.go b/pkg/networkoperatorplugin/discovery.go
index 7ce73c6..04591a4 100644
--- a/pkg/networkoperatorplugin/discovery.go
+++ b/pkg/networkoperatorplugin/discovery.go
@@ -22,7 +22,6 @@ import (
 	"fmt"
 	"slices"
 	"sort"
-	"strconv"
 	"strings"
 	"time"
 
@@ -32,6 +31,7 @@ import (
 	"github.com/nvidia/k8s-launch-kit/pkg/config"
 	"github.com/nvidia/k8s-launch-kit/pkg/kubeclient"
 	"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/internal/pciids"
+	"github.com/nvidia/k8s-launch-kit/pkg/presetmatch"
 	"github.com/nvidia/k8s-launch-kit/pkg/presets"
 	"github.com/nvidia/k8s-launch-kit/pkg/ui"
 	corev1 "k8s.io/api/core/v1"
@@ -295,55 +295,44 @@ func (p *NetworkOperatorPlugin) DiscoverClusterConfig(ctx context.Context, c cli
 			discoverGroupFabric(ctx, p.RESTConfig,
 				defaultConfig.NetworkOperator.Namespace, group, dsPods)
 
-			// Try to enrich with a predefined topology preset for this (machine,
-			// GPU) pair. Presets provide authoritative traffic classification,
-			// rail assignments, and NUMA/GPU topology metadata for known
-			// hardware configurations. Lookup is exact-match on (machineType,
+			// Try to enrich with a predefined topology preset for this
+			// (machine, GPU) pair. presetmatch.MatchGroup runs the
+			// shared lookup + deviation comparison (also used by
+			// `l8k validate`); the discovery path then additionally
+			// applies the preset onto the group so rail/NUMA topology
+			// fields populate. Lookup is exact-match on (machineType,
 			// gpuType) — both must be known for a preset to apply.
-			if group.MachineType != "" && group.GPUType != "" {
-				log.Log.V(1).Info("Looking up preset by (machineType, gpuType)",
-					"group", group.Identifier,
-					"machineType", group.MachineType,
-					"gpuType", group.GPUType)
-				preset, presetErr := presets.LoadPreset(group.MachineType, group.GPUType)
-				if presetErr != nil {
-					log.Log.Error(presetErr, "failed to load preset",
-						"machineType", group.MachineType, "gpuType", group.GPUType)
-					uiOutput.Warning("Failed to load preset for %s/%s: %v",
-						group.MachineType, group.GPUType, presetErr)
-				} else if preset == nil {
-					log.Log.V(1).Info("No preset matched (machineType, gpuType)",
-						"group", group.Identifier,
-						"machineType", group.MachineType,
-						"gpuType", group.GPUType)
-				} else {
-					// Always apply the matched preset on a best-effort basis.
-					// Any discrepancies (PF count, PCI address drift,
-					// device-ID drift) are recorded as soft deviations and
-					// re-warned about on every subsequent config load.
-					deviations := presets.ValidatePreset(preset, group.PFs)
+			matchResult := presetmatch.MatchGroup(*group)
+			log.Log.V(1).Info("Preset match",
+				"group", group.Identifier,
+				"machineType", group.MachineType,
+				"gpuType", group.GPUType,
+				"status", string(matchResult.Status),
+				"presetName", matchResult.PresetName,
+				"deviationCount", len(matchResult.Deviations))
+			switch matchResult.Status {
+			case presetmatch.StatusMatch, presetmatch.StatusDeviation:
+				// LoadPreset was successful — load it again to get
+				// the Topology so ApplyPreset can enrich the group.
+				// (MatchGroup intentionally doesn't mutate.)
+				if preset, err := presets.LoadPreset(group.MachineType, group.GPUType); err == nil && preset != nil {
 					presets.ApplyPreset(preset, group)
-					log.Log.V(1).Info("Preset matched and applied",
-						"group", group.Identifier,
-						"machineType", group.MachineType,
-						"gpuType", group.GPUType,
-						"presetPFCount", len(preset.PFs),
-						"discoveredPFCount", len(group.PFs),
-						"deviationCount", len(deviations))
-					if len(deviations) > 0 {
-						group.PresetDeviation = deviations
-						log.Log.Info("Preset applied with deviations from matched preset",
-							"group", group.Identifier,
-							"machineType", group.MachineType,
-							"gpuType", group.GPUType,
-							"deviationCount", len(deviations))
-						uiOutput.Warning(
-							"Preset for %s/%s applied with %d deviation(s) from the matched preset. The deployment is not certified — see 'presetDeviation' in cluster-config.yaml.",
-							group.MachineType, group.GPUType, len(deviations))
-					} else {
-						uiOutput.Info("Applied preset configuration for %s", group.MachineType)
-					}
 				}
+				if matchResult.Status == presetmatch.StatusDeviation {
+					group.PresetDeviation = matchResult.Deviations
+					uiOutput.Warning(
+						"Preset for %s/%s applied with %d deviation(s) from the matched preset. The deployment is not certified — see 'presetDeviation' in cluster-config.yaml.",
+						group.MachineType, group.GPUType, len(matchResult.Deviations))
+				} else {
+					uiOutput.Info("Applied preset configuration for %s", group.MachineType)
+				}
+			case presetmatch.StatusNotFound:
+				// No catalog entry — discovery continues without
+				// preset enrichment. Logged at V(1) only; not a
+				// user-actionable warning.
+			case presetmatch.StatusSkipped:
+				// machineType / gpuType wasn't discovered. Discovery
+				// already logs this via the hardware-type probes.
 			}
 
 			modules, err := discoverThirdPartyRDMAModules(ctx, p.RESTConfig,
@@ -1360,16 +1349,21 @@ func truncateForLog(s string, maxLen int) string {
 
 // discoverGroupFabric probes the InfiniBand sysfs entries on a representative
 // daemon pod for every east-west PF in `group` that has an RdmaDevice and,
-// when the per-port verdicts unanimously agree on a confirmed value, sets
+// when the per-port verdicts unanimously agree on a value, sets
 // `group.LinkType`. Otherwise the field is left empty — discovery couldn't
-// prove the cluster is using a specific fabric, and downstream code treats
-// absence as "unknown".
+// determine the fabric type, and downstream code treats absence as
+// "unknown".
 //
-// "Confirmed" means the port is ACTIVE and (for InfiniBand) a subnet
-// manager is present (sm_lid != 0). Anything else — port down, IB without
-// SM, malformed sysfs output — yields no contribution to the group's
-// verdict. Reading link_layer alone would be unreliable: that file just
-// reflects firmware config and may be a default the cluster doesn't use.
+// The verdict is the port's configured `link_layer` (sysfs file) — what
+// the firmware says the port is wired for, regardless of whether the
+// netdev is currently up. We deliberately ignore `state` (ACTIVE vs
+// DOWN) and `sm_lid` (IB subnet manager presence) here: requiring an
+// active link broke discovery on freshly-provisioned clusters where
+// the switch wasn't yet plugged in, and the configured link_layer is
+// what every downstream template needs anyway. An operator who
+// reflashes a card to a different fabric needs to re-run discover, but
+// that's the only failure mode we accept in exchange for the
+// "discover before the cluster is wired up" win.
 //
 // Multi-node groups whose RdmaDevice is empty (per the existing
 // per-node-vs-group safety rule) skip the probe — there's no ibdev name
@@ -1391,7 +1385,7 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
 		containerName = targetPod.Spec.Containers[0].Name
 	}
 
-	verdicts := map[string]int{} // confirmed linkType -> count
+	verdicts := map[string]int{} // linkType -> count of contributing PFs
 	probed := 0
 	for _, pf := range group.PFs {
 		if pf.Traffic != "east-west" || pf.RdmaDevice == "" {
@@ -1429,7 +1423,7 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
 				"group", group.Identifier,
 				"linkType", k,
 				"probedPFs", probed,
-				"confirmedPFs", verdicts[k])
+				"contributingPFs", verdicts[k])
 		}
 	case len(verdicts) > 1:
 		log.Log.V(1).Info("Group fabric ambiguous (probes disagree); leaving linkType unset",
@@ -1437,126 +1431,65 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
 			"probedPFs", probed,
 			"verdicts", verdicts)
 	default:
-		log.Log.V(1).Info("Group fabric unconfirmed (no port produced a confirmed verdict); leaving linkType unset",
+		log.Log.V(1).Info("Group fabric unresolved (no port reported a recognised link_layer); leaving linkType unset",
 			"group", group.Identifier,
 			"probedPFs", probed)
 	}
 }
 
 // discoverPortFabric reads
-// /sys/class/infiniband/<rdmaDevice>/ports/<port>/{state,phys_state,link_layer,sm_lid}
-// inside the daemon pod via a single shell exec and returns the confirmed
-// fabric for that port (empty when the port could not produce a confirmed
-// verdict). rawSummary is a short human-readable joined version of the
-// four sysfs values for debug logs.
+// /sys/class/infiniband/<rdmaDevice>/ports/<port>/link_layer inside the
+// daemon pod and returns the configured fabric for that port —
+// "Ethernet", "InfiniBand", or "" when the file is empty / unreadable /
+// unrecognised. The port's runtime state (ACTIVE / DOWN) is
+// intentionally NOT consulted: discovery has to work on freshly
+// provisioned clusters where the switch isn't yet plugged in.
 //
 // Tries `/sys/class/infiniband/...` first (works when the daemon pod
 // shares host pid+net namespace and exposes the host sysfs at /sys),
 // then falls back to `/host/sys/class/infiniband/...` for daemons that
 // mount the host filesystem under /host (matches consts.HostPath =
-// "/host" used by the rest of nic-configuration-operator). Stderr is
-// captured rather than swallowed so a failed read surfaces in the
-// debug log instead of producing a silent empty verdict.
+// "/host" used by the rest of nic-configuration-operator). The first
+// path that yields a recognised link_layer wins.
 func discoverPortFabric(ctx context.Context, restConfig *rest.Config,
 	namespace, podName, containerName, rdmaDevice string, port int) (string, string, error) {
+	var lastErr error
 	for _, base := range []string{
-		fmt.Sprintf("/sys/class/infiniband/%s/ports/%d", rdmaDevice, port),
-		fmt.Sprintf("/host/sys/class/infiniband/%s/ports/%d", rdmaDevice, port),
+		fmt.Sprintf("/sys/class/infiniband/%s/ports/%d/link_layer", rdmaDevice, port),
+		fmt.Sprintf("/host/sys/class/infiniband/%s/ports/%d/link_layer", rdmaDevice, port),
 	} {
-		cmd := fmt.Sprintf(
-			"echo state=$(cat %s/state); "+
-				"echo phys_state=$(cat %s/phys_state); "+
-				"echo link_layer=$(cat %s/link_layer); "+
-				"echo sm_lid=$(cat %s/sm_lid)",
-			base, base, base, base)
+		cmd := fmt.Sprintf("cat %s", base)
 		output, err := execInPod(ctx, restConfig, namespace, podName, containerName,
 			[]string{"/bin/sh", "-c", cmd})
-		// Even on exec error we attempt to parse — `cat` returns
-		// non-zero for missing files but the SPDY executor still
-		// captures the stdout from earlier successful echoes.
+		if err != nil {
+			lastErr = err
+			log.Log.V(1).Info("Fabric port probe: read failed at this base",
+				"rdmaDevice", rdmaDevice, "port", port, "base", base,
+				"execErr", err.Error())
+			continue
+		}
 		linkType, raw := parsePortFabricVerdict(output)
 		if linkType != "" {
 			return linkType, raw, nil
 		}
-		// First-path miss: log the raw read so an operator can see
-		// what came back, then try the next base path. We only
-		// surface the final error (if any) to the caller.
-		log.Log.V(1).Info("Fabric port probe: empty/unconfirmed at this base",
+		log.Log.V(1).Info("Fabric port probe: link_layer at this base not recognised",
 			"rdmaDevice", rdmaDevice, "port", port, "base", base,
-			"raw", raw, "execErr", errString(err))
-		if err == nil && raw != "" {
-			// Got a clean read that simply didn't meet the
-			// confirmation criteria (port DOWN, IB without
-			// SM, etc.). No point trying the other base —
-			// the port really isn't in a usable state.
-			return "", raw, nil
-		}
+			"raw", raw)
 	}
-	return "", "", fmt.Errorf("no readable sysfs at either /sys/class/infiniband/%s or /host/sys/class/infiniband/%s",
-		rdmaDevice, rdmaDevice)
-}
-
-// errString returns err.Error() or "" when err is nil — used in the
-// V(1) probe log so we don't get the literal "<nil>" sentinel.
-func errString(err error) string {
-	if err == nil {
-		return ""
+	if lastErr != nil {
+		return "", "", lastErr
 	}
-	return err.Error()
+	return "", "", nil
 }
 
-// parsePortFabricVerdict converts the four-line "key=value" output of the
-// sysfs probe into a confirmed fabric verdict (or empty when no
-// confirmation is possible).
-//
-// Confirmation rule:
-//   - Active + InfiniBand + sm_lid != 0  → "InfiniBand".
-//   - Active + Ethernet                  → "Ethernet".
-//   - Anything else                      → "" (no confirmation; caller
-//     leaves group.LinkType unset).
-//
-// Active means the state file matches "ACTIVE" (case-insensitive); the
-// kernel formats it as "4: ACTIVE", "1: DOWN", etc.
+// parsePortFabricVerdict normalises a sysfs `link_layer` read into the
+// l8k vocabulary ("Ethernet" / "InfiniBand"). The output may be the
+// raw file content ("Ethernet\n"), a `cat`'s output with possible
+// trailing newline, or empty when the file didn't exist. raw is the
+// trimmed input echoed back for debug-log breadcrumbs.
 func parsePortFabricVerdict(output string) (linkType, raw string) {
-	fields := map[string]string{}
-	for _, line := range strings.Split(output, "\n") {
-		eq := strings.IndexByte(line, '=')
-		if eq < 0 {
-			continue
-		}
-		fields[strings.TrimSpace(line[:eq])] = strings.TrimSpace(line[eq+1:])
-	}
-	state := fields["state"]
-	linkLayer := normalizeLinkLayer(fields["link_layer"])
-	smLid := fields["sm_lid"]
-
-	raw = fmt.Sprintf("state=%q phys_state=%q link_layer=%q sm_lid=%q",
-		state, fields["phys_state"], fields["link_layer"], smLid)
-
-	active := strings.Contains(strings.ToUpper(state), "ACTIVE")
-	hasSM := smLidIsNonZero(smLid)
-
-	switch {
-	case active && linkLayer == "InfiniBand" && hasSM:
-		return "InfiniBand", raw
-	case active && linkLayer == "Ethernet":
-		return "Ethernet", raw
-	default:
-		return "", raw
-	}
-}
-
-// smLidIsNonZero parses a sysfs `sm_lid` value (e.g. "0", "0x0", "0x0000",
-// "0x0001") as an unsigned integer and returns true when the value is
-// strictly greater than zero. Kernel versions disagree on the format —
-// some emit decimal, some emit hex — so we accept both via auto-base
-// (base=0 in strconv.ParseUint).
-func smLidIsNonZero(s string) bool {
-	v, err := strconv.ParseUint(strings.TrimSpace(s), 0, 32)
-	if err != nil {
-		return false
-	}
-	return v != 0
+	raw = strings.TrimSpace(output)
+	return normalizeLinkLayer(raw), raw
 }
 
 // normalizeLinkLayer canonicalises sysfs link_layer strings to the YAML
diff --git a/pkg/networkoperatorplugin/discovery_test.go b/pkg/networkoperatorplugin/discovery_test.go
index 305c678..449c162 100644
--- a/pkg/networkoperatorplugin/discovery_test.go
+++ b/pkg/networkoperatorplugin/discovery_test.go
@@ -432,46 +432,43 @@ func TestKnownStorageModules_MatchesMofedmodules(t *testing.T) {
 
 // --- parsePortFabricVerdict tests ---
 
-func TestParsePortFabricVerdict_ConfirmedInfiniBand(t *testing.T) {
-	out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=InfiniBand\nsm_lid=0x0001\n"
-	linkType, raw := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_InfiniBand(t *testing.T) {
+	linkType, raw := parsePortFabricVerdict("InfiniBand\n")
 	assert.Equal(t, "InfiniBand", linkType)
-	assert.Contains(t, raw, "sm_lid=\"0x0001\"")
+	assert.Equal(t, "InfiniBand", raw)
 }
 
-func TestParsePortFabricVerdict_ConfirmedEthernet(t *testing.T) {
-	// Ethernet ports don't need an SM; ACTIVE + Ethernet alone is enough.
-	out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=Ethernet\nsm_lid=0x0000\n"
-	linkType, _ := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_Ethernet(t *testing.T) {
+	linkType, raw := parsePortFabricVerdict("Ethernet\n")
 	assert.Equal(t, "Ethernet", linkType)
+	assert.Equal(t, "Ethernet", raw)
 }
 
-func TestParsePortFabricVerdict_UnverifiedIB_NoSM_ReturnsEmpty(t *testing.T) {
-	// Active IB port without a subnet manager — we can't confirm the
-	// cluster is using IB. Verdict is empty (caller leaves group.LinkType
-	// unset).
-	out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=InfiniBand\nsm_lid=0x0000\n"
-	linkType, _ := parsePortFabricVerdict(out)
-	assert.Equal(t, "", linkType)
+func TestParsePortFabricVerdict_DownPortStillResolvesByLinkLayer(t *testing.T) {
+	// Old behaviour required ACTIVE state; the new probe reads
+	// only the configured link_layer file, which gives a verdict
+	// regardless of runtime state. This is what unblocks
+	// discovery on freshly provisioned clusters where the switch
+	// isn't plugged in yet.
+	linkType, _ := parsePortFabricVerdict("Ethernet\n")
+	assert.Equal(t, "Ethernet", linkType)
 }
 
-func TestParsePortFabricVerdict_DownPort_ReturnsEmpty(t *testing.T) {
-	// Port not ACTIVE — no confirmation possible.
-	out := "state=1: DOWN\nphys_state=3: Disabled\nlink_layer=Ethernet\nsm_lid=0x0000\n"
-	linkType, _ := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_EmptyOutput(t *testing.T) {
+	linkType, raw := parsePortFabricVerdict("")
 	assert.Equal(t, "", linkType)
+	assert.Equal(t, "", raw)
 }
 
-func TestParsePortFabricVerdict_EmptyOutput(t *testing.T) {
-	linkType, _ := parsePortFabricVerdict("")
+func TestParsePortFabricVerdict_UnrecognisedValue(t *testing.T) {
+	linkType, raw := parsePortFabricVerdict("Foo\n")
 	assert.Equal(t, "", linkType)
+	assert.Equal(t, "Foo", raw)
 }
 
-func TestParsePortFabricVerdict_PartialOutput(t *testing.T) {
-	// link_layer line missing — no verdict.
-	out := "state=4: ACTIVE\nphys_state=5: LinkUp\nsm_lid=0x0001\n"
-	linkType, _ := parsePortFabricVerdict(out)
-	assert.Equal(t, "", linkType)
+func TestParsePortFabricVerdict_TrimsWhitespace(t *testing.T) {
+	linkType, _ := parsePortFabricVerdict("  Ethernet  \n")
+	assert.Equal(t, "Ethernet", linkType)
 }
 
 func TestNormalizeLinkLayer(t *testing.T) {
diff --git a/pkg/networkoperatorplugin/validate.go b/pkg/networkoperatorplugin/validate.go
index d13f26f..ad2d604 100644
--- a/pkg/networkoperatorplugin/validate.go
+++ b/pkg/networkoperatorplugin/validate.go
@@ -62,7 +62,7 @@ type ValidationResult struct {
 	Details map[string]string
 
 	// LiveYAML is the cluster's view of the object, marshalled
-	// back to YAML for the verify-report's expandable "Live YAML"
+	// back to YAML for the validation report's expandable "Live YAML"
 	// dropdown. Empty when the object isn't present
 	// (StateNotDeployed) or when the post-validate fetch failed.
 	// Managed-fields / status are kept; we want the operator to
diff --git a/pkg/presetmatch/presetmatch.go b/pkg/presetmatch/presetmatch.go
new file mode 100644
index 0000000..3f204f5
--- /dev/null
+++ b/pkg/presetmatch/presetmatch.go
@@ -0,0 +1,176 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// Package presetmatch is the single entry point for comparing a
+// discovered cluster group against the topology-preset catalog.
+//
+// Both `l8k discover` (at discovery time, when preset data is also
+// applied into the group via presets.ApplyPreset) and `l8k validate`
+// (at validation time, to confirm that the cluster's recorded hardware
+// still matches the certified preset) call MatchGroup / MatchAll
+// here. Keeping the lookup logic in one place means new lookup
+// behaviour (fuzzy matching, preset-version awareness, …) lands in
+// one file and both call sites pick it up.
+//
+// MatchGroup does not mutate the input group. Callers that want to
+// also enrich the group's transient topology fields call
+// presets.ApplyPreset themselves (typically only the discovery path
+// does that).
+package presetmatch
+
+import (
+	"fmt"
+
+	"github.com/nvidia/k8s-launch-kit/pkg/config"
+	"github.com/nvidia/k8s-launch-kit/pkg/presets"
+)
+
+// Status enumerates the outcomes of a per-group preset lookup.
+type Status string
+
+const (
+	// StatusMatch — preset found and every comparison field
+	// matches the discovered hardware exactly.
+	StatusMatch Status = "match"
+	// StatusDeviation — preset found but the hardware drifts on at
+	// least one field (PF count, PCI addresses, device IDs).
+	// Deviations are informational: the deployment can still run
+	// correctly against drifted hardware, just not against the
+	// certified preset.
+	StatusDeviation Status = "deviation"
+	// StatusNotFound — no preset matches the (machineType, gpuType)
+	// pair. Most common reason: the pair hasn't been catalogued
+	// yet, or discovery didn't populate machineType / gpuType
+	// (e.g. running on hardware the GPU operator labels don't
+	// cover).
+	StatusNotFound Status = "not-found"
+	// StatusSkipped — the lookup couldn't even be attempted (e.g.
+	// machineType or gpuType empty on the group). Distinct from
+	// StatusNotFound so callers can render "discovery is
+	// incomplete" rather than "no preset for this hardware."
+	StatusSkipped Status = "skipped"
+)
+
+// Result describes one group's preset-lookup outcome. Surfaced
+// verbatim by validate's text/JSON output and the HTML report. The
+// Deviations slice mirrors config.PresetDeviationEntry so callers
+// that already render the existing presetDeviation field keep
+// working without translation.
+type Result struct {
+	Group       string
+	MachineType string
+	GPUType     string
+	// Manufacturer is propagated from the matched preset's
+	// topology.yaml when a preset was found (StatusMatch /
+	// StatusDeviation). Empty otherwise. Surfaced in the user-
+	// facing "server type" label as the leading segment of
+	// <manufacturer>-<machineType>-<gpuType>.
+	Manufacturer string
+	Status       Status
+	// PresetName is the catalog directory name (what `l8k preset
+	// list` prints) when a preset was found. Empty otherwise.
+	PresetName string
+	// Reason carries a short human-readable explanation when
+	// Status is StatusNotFound or StatusSkipped, and a one-line
+	// summary when StatusDeviation ("3 deviation(s) — pfCount,
+	// pciAddress, deviceID"). Empty when StatusMatch.
+	Reason     string
+	Deviations []config.PresetDeviationEntry
+	// Preset is the loaded topology that produced the match. Used
+	// downstream to enrich "missing PCI" rows in the validation
+	// report with the expected deviceID / rail / netdev when the
+	// cluster doesn't have a device the certified topology
+	// expects. nil when Status is NotFound or Skipped.
+	Preset *presets.Topology
+}
+
+// MatchGroup runs the preset lookup + comparison for one group.
+// Does not mutate the group; callers that also want to enrich
+// rail/NUMA topology fields invoke presets.ApplyPreset separately.
+//
+// Lookup is exact-match on (machineType, gpuType). Empty fields
+// short-circuit to StatusSkipped — without those values the preset
+// catalog can't be queried, and we don't fall back to fuzzy matching
+// because picking the wrong preset would silently rewrite the
+// deployment to target the wrong hardware shape.
+func MatchGroup(group config.ClusterConfig) Result {
+	res := Result{
+		Group:       group.Identifier,
+		MachineType: group.MachineType,
+		GPUType:     group.GPUType,
+	}
+	if group.MachineType == "" || group.GPUType == "" {
+		res.Status = StatusSkipped
+		switch {
+		case group.MachineType == "" && group.GPUType == "":
+			res.Reason = "machineType and gpuType not discovered on group — `l8k discover` did not populate them"
+		case group.MachineType == "":
+			res.Reason = "machineType not discovered on group"
+		default:
+			res.Reason = "gpuType not discovered on group"
+		}
+		return res
+	}
+	preset, err := presets.LoadPreset(group.MachineType, group.GPUType)
+	if err != nil {
+		res.Status = StatusNotFound
+		res.Reason = fmt.Sprintf("preset lookup failed: %v", err)
+		return res
+	}
+	if preset == nil {
+		res.Status = StatusNotFound
+		res.Reason = fmt.Sprintf("no preset matches (%s, %s) in the local presets directory", group.MachineType, group.GPUType)
+		return res
+	}
+	deviations := presets.ValidatePreset(preset, group.PFs)
+	res.PresetName = preset.MachineType + "/" + preset.GPUType
+	res.Manufacturer = preset.Manufacturer
+	res.Deviations = deviations
+	res.Preset = preset
+	if len(deviations) == 0 {
+		res.Status = StatusMatch
+		return res
+	}
+	res.Status = StatusDeviation
+	res.Reason = fmt.Sprintf("%d deviation(s) from matched preset", len(deviations))
+	return res
+}
+
+// MatchAll runs MatchGroup over every entry in cfg.ClusterConfig and
+// returns the results in the same order. cfg is never mutated.
+func MatchAll(cfg *config.LaunchKubernetesConfig) []Result {
+	if cfg == nil {
+		return nil
+	}
+	out := make([]Result, 0, len(cfg.ClusterConfig))
+	for _, g := range cfg.ClusterConfig {
+		out = append(out, MatchGroup(g))
+	}
+	return out
+}
+
+// AnyMatched reports whether any of the results found AND fully
+// matched a preset. Used by the validate CLI to decide whether the
+// "preset" check is worth surfacing at all (vs. hidden when no
+// presets exist for the cluster's hardware).
+func AnyMatched(results []Result) bool {
+	for _, r := range results {
+		if r.Status == StatusMatch || r.Status == StatusDeviation {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/presetmatch/presetmatch_test.go b/pkg/presetmatch/presetmatch_test.go
new file mode 100644
index 0000000..0e31559
--- /dev/null
+++ b/pkg/presetmatch/presetmatch_test.go
@@ -0,0 +1,85 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package presetmatch
+
+import (
+	"testing"
+
+	"github.com/nvidia/k8s-launch-kit/pkg/config"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMatchGroup_SkippedWhenMachineOrGPUMissing(t *testing.T) {
+	t.Run("both empty", func(t *testing.T) {
+		r := MatchGroup(config.ClusterConfig{Identifier: "g"})
+		assert.Equal(t, StatusSkipped, r.Status)
+		assert.Contains(t, r.Reason, "machineType and gpuType not discovered")
+	})
+	t.Run("machineType only", func(t *testing.T) {
+		r := MatchGroup(config.ClusterConfig{Identifier: "g", MachineType: "vendor-a-h200"})
+		assert.Equal(t, StatusSkipped, r.Status)
+		assert.Contains(t, r.Reason, "gpuType not discovered")
+	})
+	t.Run("gpuType only", func(t *testing.T) {
+		r := MatchGroup(config.ClusterConfig{Identifier: "g", GPUType: "h200"})
+		assert.Equal(t, StatusSkipped, r.Status)
+		assert.Contains(t, r.Reason, "machineType not discovered")
+	})
+}
+
+// The preset catalog can be absent in the test environment (it's
+// installed under /usr/local/share/l8k or similar). When that's the
+// case LoadPreset returns "not found" for any pair — exercising the
+// StatusNotFound path. The other Status outcomes need real preset
+// fixtures and are covered by the higher-level integration tests in
+// pkg/presets and pkg/networkoperatorplugin.
+func TestMatchGroup_NotFoundWhenNoCatalog(t *testing.T) {
+	r := MatchGroup(config.ClusterConfig{
+		Identifier:  "g",
+		MachineType: "fictional-machine-no-such-preset",
+		GPUType:     "h200",
+	})
+	// Either StatusNotFound (catalog exists but no match) or
+	// StatusNotFound (no catalog at all) — both produce the same
+	// status code, so a single assertion is enough.
+	assert.Equal(t, StatusNotFound, r.Status)
+	assert.NotEmpty(t, r.Reason)
+}
+
+func TestMatchAll(t *testing.T) {
+	cfg := &config.LaunchKubernetesConfig{
+		ClusterConfig: []config.ClusterConfig{
+			{Identifier: "a"},
+			{Identifier: "b", MachineType: "vendor-x", GPUType: "h200"},
+		},
+	}
+	results := MatchAll(cfg)
+	assert.Len(t, results, 2)
+	assert.Equal(t, "a", results[0].Group)
+	assert.Equal(t, StatusSkipped, results[0].Status)
+	assert.Equal(t, "b", results[1].Group)
+	// StatusNotFound when no preset catalog; never panics either way.
+	assert.NotEqual(t, StatusSkipped, results[1].Status)
+}
+
+func TestAnyMatched(t *testing.T) {
+	assert.False(t, AnyMatched(nil))
+	assert.False(t, AnyMatched([]Result{{Status: StatusSkipped}, {Status: StatusNotFound}}))
+	assert.True(t, AnyMatched([]Result{{Status: StatusMatch}}))
+	assert.True(t, AnyMatched([]Result{{Status: StatusDeviation}}))
+	assert.True(t, AnyMatched([]Result{{Status: StatusSkipped}, {Status: StatusMatch}}))
+}
diff --git a/profiles/host-device-rdma/40-example-daemonset.yaml b/profiles/host-device-rdma/40-example-daemonset.yaml
index 904a658..ad13a26 100644
--- a/profiles/host-device-rdma/40-example-daemonset.yaml
+++ b/profiles/host-device-rdma/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
@@ -107,7 +107,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
diff --git a/profiles/ipoib-rdma-shared/40-example-daemonset.yaml b/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
index 29738ee..43448fe 100644
--- a/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
+++ b/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
@@ -108,7 +108,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
diff --git a/profiles/macvlan-rdma-shared/40-example-daemonset.yaml b/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
index 3e1aeea..b016505 100644
--- a/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
+++ b/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
@@ -108,7 +108,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
diff --git a/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml b/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
index 5cf601a..9afcaff 100644
--- a/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
+++ b/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
@@ -46,7 +46,8 @@ spec:
       {{- end }}
       containers:
       - name: spectrum-x-test
-        image: nvcr.io/nvidia/mellanox/rping-test:latest
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
+        command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
             add: ["IPC_LOCK", "NET_RAW"]
diff --git a/profiles/spectrum-x/90-example-daemonset.yaml b/profiles/spectrum-x/90-example-daemonset.yaml
index 5cf601a..9afcaff 100644
--- a/profiles/spectrum-x/90-example-daemonset.yaml
+++ b/profiles/spectrum-x/90-example-daemonset.yaml
@@ -46,7 +46,8 @@ spec:
       {{- end }}
       containers:
       - name: spectrum-x-test
-        image: nvcr.io/nvidia/mellanox/rping-test:latest
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
+        command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
             add: ["IPC_LOCK", "NET_RAW"]
diff --git a/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml b/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
index abffc47..8a60e87 100644
--- a/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
+++ b/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
@@ -107,7 +107,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
diff --git a/profiles/sriov-ib-rdma/60-example-daemonset.yaml b/profiles/sriov-ib-rdma/60-example-daemonset.yaml
index 7d56b5c..b14bfd7 100644
--- a/profiles/sriov-ib-rdma/60-example-daemonset.yaml
+++ b/profiles/sriov-ib-rdma/60-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:
@@ -107,7 +107,7 @@ spec:
       {{- end }}
       containers:
       - name: test-container
-        image: mellanox/rping-test
+        image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
         command: ["/bin/bash", "-c", "sleep infinity"]
         securityContext:
           capabilities:

Field	Expected	Got	Detail
PCI	Device ID	Netdev	RDMA	PSID	Part #
{{.Field}}	`{{or .Expected "—"}}`	`{{or .Got "—"}}`	{{.Detail}}
`{{.PciAddress}}`	`{{or .DeviceID "—"}}`	`{{or .NetworkInterface "—"}}`	`{{or .RdmaDevice "—"}}`	{{or .PSID "—"}}	{{or .PartNumber "—"}}
PCI	Device ID	Rail	Netdev	RDMA	PSID	Part #	NUMA	GPU
`0000:08:00.0`	`1021`	0
`0000:08:00.1`	`1021`	`1023`	1	`eth_r1`	`rdma_r1`	0	1
`0000:08:00.2`	`1023`	2	`eth_r2`	`rdma_r2`	—	MCX713106AE	0	2
Field	Expected	Got	Detail
PCI	Device ID	Rail	Netdev	RDMA	PSID	Part #	NUMA	GPU
deviceID	`1023`	`1021`	expected ConnectX-8 (1023), found ConnectX-7 (1021)
`0000:08:00.0`	`1023`	0	`eth_r0`	`rdma_r0`	—	MCX713106AE	0	0
`0000:08:00.1`	`1023`	1	`eth_r1`	`rdma_r1`	—	MCX713106AE	0	1
`0000:08:00.3`	`1023`	3	`eth_r3`	`rdma_r3`	—	MCX713106AE	0	3
node-a	—	✓ 0% 0.18ms	✓ 194.4 Gbps
node-b	✗ 100%	✓ 193.8 Gbps	—