- 4/5 passed · 1 failed
+ 7/8 passed · 1 failed
-
Rail rail-0
+
Rail rail-0 — RDMA ping (rping)
@@ -637,16 +775,16 @@ Connectivity matrix
| node-a |
— |
- ✓ 0% 0.12ms |
+ ✓ |
| node-b |
- ✓ 0% 0.15ms |
+ ✓ |
— |
-
Rail rail-1
+
Rail rail-0 — RDMA bandwidth (ib_write_bw)
@@ -659,16 +797,53 @@ Connectivity matrix
| node-a |
— |
- ✓ 0% 0.18ms |
+ ✓ 194.4 Gbps |
| node-b |
- ✗ 100% |
+ ✓ 193.8 Gbps |
— |
-
Cross-rail canary
+
Rail rail-1 — RDMA ping (rping)
+
+
+
+ | src \ dst |
+ node-a |
+ node-b |
+
+
+
+
+ | node-a |
+ — |
+ ✓ |
+
+
+ | node-b |
+ ✗ |
+ — |
+
+
+
+
Cross-rail canary — RDMA ping (rping)
+
+
+ | Source | Src rail | Destination | Dst rail | Result |
+
+
+
+ | node-a |
+ rail-0 |
+ node-b |
+ rail-1 |
+ ✓ |
+
+
+
+
Cross-rail canary — RDMA bandwidth (ib_write_bw)
| Source | Src rail | Destination | Dst rail | Result |
@@ -679,7 +854,7 @@ Connectivity matrix
rail-0 |
node-b |
rail-1 |
- ✓ 0% 0.20ms |
+ ✓ 187.6 Gbps |
diff --git a/pkg/networkoperatorplugin/connectivity/text_report.go b/pkg/networkoperatorplugin/connectivity/text_report.go
index c479047..84b9177 100644
--- a/pkg/networkoperatorplugin/connectivity/text_report.go
+++ b/pkg/networkoperatorplugin/connectivity/text_report.go
@@ -33,12 +33,11 @@ import (
// structured MatrixResult that the validate CLI marshals to stdout.
//
// Cells:
-// - "—" src equals dst (self-ping, not run)
+// - "—" src equals dst (self-test, not run)
// - "·" no test for this (src,dst,rail) — shared-rail set
// didn't include both endpoints
-// - "✓ 0%" ping passed; packet loss percentage when non-zero,
-// RTT when present (e.g. "✓ 0% 0.5ms")
-// - "✗ 100%" ping failed; "✗ ERR" if the exec itself errored
+// - rping — "✓" / "✗" / "✗ ERR"
+// - ib_write_bw — "✓ 194.4 Gbps" / "✗ ERR"
//
// ANSI color is applied only when uiOutput.IsTTY() — keeps log files
// and CI pipelines free of escape sequences.
@@ -51,34 +50,75 @@ func RenderMatrixText(uiOutput ui.Output, result *MatrixResult) {
}
tty := uiOutput.IsTTY()
- byRail, crossRail, nodesSorted, railsSorted := groupResults(result.PingResults)
+ // Group per (rail, kind family). One grid is rendered for
+ // every (rail, family) bucket that has at least one result, in
+ // the test-execution order so the reader sees rping
+ // (QP-establishment canary) before ib_write_bw (bandwidth).
+ byRail, byCross, nodes, rails := groupResultsByKind(result.PingResults)
- for _, rail := range railsSorted {
- uiOutput.Info("")
- uiOutput.Info("Rail %s:", rail)
- for _, line := range renderRailGrid(nodesSorted, byRail[rail], tty) {
- uiOutput.Info("%s", line)
+ families := []kindFamily{familyRPing, familyIbBw}
+ for _, rail := range rails {
+ for _, fam := range families {
+ grid, ok := byRail[rail][fam]
+ if !ok {
+ continue
+ }
+ uiOutput.Info("")
+ uiOutput.Info("Rail %s — %s:", rail, familyTitle(fam))
+ for _, line := range renderRailGrid(nodes, grid, fam, tty) {
+ uiOutput.Info("%s", line)
+ }
}
}
- if len(crossRail) > 0 {
+ for _, fam := range families {
+ cross, ok := byCross[fam]
+ if !ok || len(cross) == 0 {
+ continue
+ }
uiOutput.Info("")
- uiOutput.Info("Cross-rail canary:")
- for _, line := range renderCrossRailList(crossRail, tty) {
+ uiOutput.Info("Cross-rail canary — %s:", familyTitle(fam))
+ for _, line := range renderCrossRailList(cross, fam, tty) {
uiOutput.Info("%s", line)
}
}
}
-// groupResults indexes ping results by rail and source node for the
-// per-rail grid, plus a flat slice for the cross-rail canary. Returns
-// the deterministic node and rail orderings so callers don't have to
-// sort again. Node names are used as the axis labels because they're
-// what operators recognize ("worker-03" vs the DaemonSet-generated
-// pod suffix like "sriov-test-7t8h9"); the underlying pod name is
-// still carried on the PingTest for the SPDY exec path.
-func groupResults(results []PingResult) (byRail map[string]map[string]map[string]*PingResult, crossRail []*PingResult, nodes []string, rails []string) {
- byRail = map[string]map[string]map[string]*PingResult{}
+// kindFamily collapses the four PingTestKind values down to the two
+// families the renderer cares about: rping and ib_write_bw. The
+// same-rail / cross-rail axis is handled separately (per-rail grid vs
+// cross-rail list).
+type kindFamily int
+
+const (
+ familyRPing kindFamily = iota
+ familyIbBw
+)
+
+func kindFamilyOf(k PingTestKind) kindFamily {
+ if k.IsRDMABw() {
+ return familyIbBw
+ }
+ return familyRPing
+}
+
+func familyTitle(f kindFamily) string {
+ if f == familyIbBw {
+ return "RDMA bandwidth (ib_write_bw)"
+ }
+ return "RDMA ping (rping)"
+}
+
+// groupResultsByKind indexes results by (rail, family, src) → dst →
+// result for the per-rail grids, and by family → []result for the
+// cross-rail canary lists. Returns the deterministic node and rail
+// orderings so callers don't have to sort again. Node names are used
+// as the axis labels because they're what operators recognize
+// ("worker-03" vs the DaemonSet-generated pod suffix); the underlying
+// pod name is still carried on the PingTest for the SPDY exec path.
+func groupResultsByKind(results []PingResult) (byRail map[string]map[kindFamily]map[string]map[string]*PingResult, byCross map[kindFamily][]*PingResult, nodes []string, rails []string) {
+ byRail = map[string]map[kindFamily]map[string]map[string]*PingResult{}
+ byCross = map[kindFamily][]*PingResult{}
nodeSet := map[string]struct{}{}
railSet := map[string]struct{}{}
@@ -88,18 +128,22 @@ func groupResults(results []PingResult) (byRail map[string]map[string]map[string
dst := axisLabel(r.Test.DstNode, r.Test.DstPod)
nodeSet[src] = struct{}{}
nodeSet[dst] = struct{}{}
- if r.Test.Kind == PingCrossRail {
- crossRail = append(crossRail, r)
+ fam := kindFamilyOf(r.Test.Kind)
+ if r.Test.Kind.IsCrossRail() {
+ byCross[fam] = append(byCross[fam], r)
continue
}
if _, ok := byRail[r.Test.Rail]; !ok {
- byRail[r.Test.Rail] = map[string]map[string]*PingResult{}
+ byRail[r.Test.Rail] = map[kindFamily]map[string]map[string]*PingResult{}
railSet[r.Test.Rail] = struct{}{}
}
- if _, ok := byRail[r.Test.Rail][src]; !ok {
- byRail[r.Test.Rail][src] = map[string]*PingResult{}
+ if _, ok := byRail[r.Test.Rail][fam]; !ok {
+ byRail[r.Test.Rail][fam] = map[string]map[string]*PingResult{}
+ }
+ if _, ok := byRail[r.Test.Rail][fam][src]; !ok {
+ byRail[r.Test.Rail][fam][src] = map[string]*PingResult{}
}
- byRail[r.Test.Rail][src][dst] = r
+ byRail[r.Test.Rail][fam][src][dst] = r
}
nodes = make([]string, 0, len(nodeSet))
@@ -130,7 +174,9 @@ func axisLabel(node, pod string) string {
// renderRailGrid produces the lines for one rail's src×dst grid, with
// columns aligned via text/tabwriter. Axis labels are node names
// (with pod-name fallback for endpoints whose NodeName wasn't set).
-func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty bool) []string {
+// fam selects per-kind cell formatting (ICMP shows loss + RTT,
+// rping shows ✓/✗, ib_write_bw shows ✓ N Gbps).
+func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, fam kindFamily, tty bool) []string {
var buf bytes.Buffer
tw := tabwriter.NewWriter(&buf, 0, 0, 2, ' ', 0)
@@ -144,7 +190,7 @@ func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty
for _, src := range nodes {
fmt.Fprintf(tw, " %s\t", shortPodName(src))
for _, dst := range nodes {
- fmt.Fprintf(tw, "%s\t", cellFor(src, dst, table[src][dst], tty))
+ fmt.Fprintf(tw, "%s\t", cellFor(src, dst, table[src][dst], fam, tty))
}
fmt.Fprintln(tw)
}
@@ -158,7 +204,7 @@ func renderRailGrid(nodes []string, table map[string]map[string]*PingResult, tty
// asymmetric (srcRail, dstRail) shape neatly. Same node-name fallback
// as the per-rail grid: prefer Pod.Spec.NodeName, fall back to the
// pod name.
-func renderCrossRailList(results []*PingResult, tty bool) []string {
+func renderCrossRailList(results []*PingResult, fam kindFamily, tty bool) []string {
type row struct {
src, dst string
r *PingResult
@@ -184,7 +230,7 @@ func renderCrossRailList(results []*PingResult, tty bool) []string {
left := fmt.Sprintf(" %s [%s]\t→ %s [%s]\t",
shortPodName(row.src), row.r.Test.SrcRail,
shortPodName(row.dst), row.r.Test.DstRail)
- fmt.Fprintf(tw, "%s%s\n", left, cellDetail(row.r, tty))
+ fmt.Fprintf(tw, "%s%s\n", left, cellDetail(row.r, fam, tty))
}
tw.Flush()
return trailingTrim(strings.Split(buf.String(), "\n"))
@@ -193,31 +239,24 @@ func renderCrossRailList(results []*PingResult, tty bool) []string {
// cellFor renders one src×dst grid cell. self-pairs render as "—",
// missing pairs as "·" (the rail set didn't pair these two pods —
// rare; e.g. one pod's multus annotation was missing this rail).
-func cellFor(src, dst string, r *PingResult, tty bool) string {
+func cellFor(src, dst string, r *PingResult, fam kindFamily, tty bool) string {
if src == dst {
return "—"
}
if r == nil {
return "·"
}
- return cellDetail(r, tty)
+ return cellDetail(r, fam, tty)
}
// cellDetail formats a single result into its terse cell representation
-// + optional ANSI color when TTY.
-func cellDetail(r *PingResult, tty bool) string {
- var body string
- switch {
- case r.OK:
- body = fmt.Sprintf("✓ %d%%", r.PacketLoss)
- if r.RTTAvgMs > 0 {
- body = fmt.Sprintf("%s %.1fms", body, r.RTTAvgMs)
- }
- case r.PacketLoss >= 0:
- body = fmt.Sprintf("✗ %d%%", r.PacketLoss)
- default:
- body = "✗ ERR"
- }
+// + optional ANSI color when TTY. The body shape depends on the
+// kind family:
+//
+// - rping: ✓ / ✗
+// - ib_write_bw: ✓ 194.4 Gbps / ✗ ERR
+func cellDetail(r *PingResult, fam kindFamily, tty bool) string {
+ body := cellBody(r, fam)
if tty {
if r.OK {
return "\033[32m" + body + "\033[0m"
@@ -227,6 +266,20 @@ func cellDetail(r *PingResult, tty bool) string {
return body
}
+func cellBody(r *PingResult, fam kindFamily) string {
+ if fam == familyIbBw {
+ if r.OK && r.BandwidthGbps > 0 {
+ return fmt.Sprintf("✓ %.1f Gbps", r.BandwidthGbps)
+ }
+ return "✗ ERR"
+ }
+ // rping
+ if r.OK {
+ return "✓"
+ }
+ return "✗"
+}
+
// shortPodName trims a long pod name to keep grid columns from blowing
// out — DaemonSet pods often have a 5-char random suffix after a long
// app name. We keep the leading portion + the last 5 chars (the hash)
diff --git a/pkg/networkoperatorplugin/connectivity/text_report_test.go b/pkg/networkoperatorplugin/connectivity/text_report_test.go
index f3df1a3..7c34a92 100644
--- a/pkg/networkoperatorplugin/connectivity/text_report_test.go
+++ b/pkg/networkoperatorplugin/connectivity/text_report_test.go
@@ -30,58 +30,75 @@ import (
// the order it was emitted so tests can assert on exact lines.
type captureOutput struct{ lines []string }
-func (c *captureOutput) Info(format string, args ...interface{}) { c.lines = append(c.lines, fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Success(format string, args ...interface{}) { c.lines = append(c.lines, "SUCCESS: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Warning(format string, args ...interface{}) { c.lines = append(c.lines, "WARNING: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) Error(format string, args ...interface{}) { c.lines = append(c.lines, "ERROR: "+fmt.Sprintf(format, args...)) }
-func (c *captureOutput) StartProgress(message string) ui.Progress { return &captureProgress{out: c, msg: message} }
-func (c *captureOutput) Header(text string) {}
-func (c *captureOutput) Section(text string) { c.lines = append(c.lines, "SECTION: "+text) }
-func (c *captureOutput) Confirm(string) (bool, error) { return true, nil }
-func (c *captureOutput) IsTTY() bool { return false }
+func (c *captureOutput) Info(format string, args ...interface{}) {
+ c.lines = append(c.lines, fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Success(format string, args ...interface{}) {
+ c.lines = append(c.lines, "SUCCESS: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Warning(format string, args ...interface{}) {
+ c.lines = append(c.lines, "WARNING: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) Error(format string, args ...interface{}) {
+ c.lines = append(c.lines, "ERROR: "+fmt.Sprintf(format, args...))
+}
+func (c *captureOutput) StartProgress(message string) ui.Progress {
+ return &captureProgress{out: c, msg: message}
+}
+func (c *captureOutput) Header(text string) {}
+func (c *captureOutput) Section(text string) { c.lines = append(c.lines, "SECTION: "+text) }
+func (c *captureOutput) Confirm(string) (bool, error) { return true, nil }
+func (c *captureOutput) IsTTY() bool { return false }
type captureProgress struct {
out *captureOutput
msg string
}
-func (p *captureProgress) Update(string) {}
-func (p *captureProgress) Success(m string) { p.out.lines = append(p.out.lines, "PROGRESS_OK: "+m) }
-func (p *captureProgress) Fail(m string) { p.out.lines = append(p.out.lines, "PROGRESS_FAIL: "+m) }
+func (p *captureProgress) Update(string) {}
+func (p *captureProgress) Success(m string) { p.out.lines = append(p.out.lines, "PROGRESS_OK: "+m) }
+func (p *captureProgress) Fail(m string) { p.out.lines = append(p.out.lines, "PROGRESS_FAIL: "+m) }
-// Helpers synthesize results with both pod and node names so the
-// renderer's node-label preference is exercised.
+// Helpers synthesize RDMA results so the renderer's per-(rail, family)
+// grouping and node-label fallback are exercised.
-func successResult(src, dst, rail, srcIP, dstIP string, lossPercent int, rttMs float64) PingResult {
+func rpingResult(src, dst, rail string, ok bool) PingResult {
return PingResult{
Test: PingTest{
- Kind: PingSameRail,
+ Kind: RDMAPingSameRail,
SrcPod: src + "-pod",
DstPod: dst + "-pod",
SrcNode: src,
DstNode: dst,
Rail: rail,
- SrcIP: srcIP,
- DstIP: dstIP,
SrcRail: rail,
DstRail: rail,
},
- OK: lossPercent < 100,
- PacketLoss: lossPercent,
- RTTAvgMs: rttMs,
+ OK: ok,
}
}
-func failResult(src, dst, rail string, lossPercent int) PingResult {
- r := successResult(src, dst, rail, "", "", lossPercent, 0)
- r.OK = false
- return r
+func ibBwResult(src, dst, rail string, ok bool, bwGbps float64) PingResult {
+ return PingResult{
+ Test: PingTest{
+ Kind: RDMABwSameRail,
+ SrcPod: src + "-pod",
+ DstPod: dst + "-pod",
+ SrcNode: src,
+ DstNode: dst,
+ Rail: rail,
+ SrcRail: rail,
+ DstRail: rail,
+ },
+ OK: ok,
+ BandwidthGbps: bwGbps,
+ }
}
-func crossResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
+func crossRpingResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
return PingResult{
Test: PingTest{
- Kind: PingCrossRail,
+ Kind: RDMAPingCrossRail,
SrcPod: src + "-pod",
DstPod: dst + "-pod",
SrcNode: src,
@@ -90,23 +107,24 @@ func crossResult(src, dst, srcRail, dstRail string, ok bool) PingResult {
SrcRail: srcRail,
DstRail: dstRail,
},
- OK: ok,
- PacketLoss: 0,
+ OK: ok,
}
}
func TestRenderMatrixText_RailGridAndCrossRail(t *testing.T) {
result := &MatrixResult{
PingResults: []PingResult{
- // Rail rail-0: 2 nodes, all green
- successResult("worker-1", "worker-2", "rail-0", "10.0.0.1", "10.0.0.2", 0, 0.5),
- successResult("worker-2", "worker-1", "rail-0", "10.0.0.2", "10.0.0.1", 0, 0.6),
- // Rail rail-1: one direction fails
- successResult("worker-1", "worker-2", "rail-1", "10.0.1.1", "10.0.1.2", 0, 0.7),
- failResult("worker-2", "worker-1", "rail-1", 100),
+ // Rail rail-0: 2 nodes, both rping pass
+ rpingResult("worker-1", "worker-2", "rail-0", true),
+ rpingResult("worker-2", "worker-1", "rail-0", true),
+ ibBwResult("worker-1", "worker-2", "rail-0", true, 194.4),
+ ibBwResult("worker-2", "worker-1", "rail-0", true, 193.8),
+ // Rail rail-1: one rping direction fails
+ rpingResult("worker-1", "worker-2", "rail-1", true),
+ rpingResult("worker-2", "worker-1", "rail-1", false),
// Cross-rail canaries
- crossResult("worker-1", "worker-2", "rail-0", "rail-1", true),
- crossResult("worker-2", "worker-1", "rail-0", "rail-1", false),
+ crossRpingResult("worker-1", "worker-2", "rail-0", "rail-1", true),
+ crossRpingResult("worker-2", "worker-1", "rail-0", "rail-1", false),
},
}
@@ -115,21 +133,25 @@ func TestRenderMatrixText_RailGridAndCrossRail(t *testing.T) {
joined := strings.Join(out.lines, "\n")
- // Sanity: both rails rendered.
- assert.Contains(t, joined, "Rail rail-0:")
- assert.Contains(t, joined, "Rail rail-1:")
+ // Sanity: both rails rendered. The kind family appears in the
+ // header so we look for the "Rail
— RDMA ping" prefix.
+ assert.Contains(t, joined, "Rail rail-0 — RDMA ping (rping):")
+ assert.Contains(t, joined, "Rail rail-1 — RDMA ping (rping):")
+ assert.Contains(t, joined, "Rail rail-0 — RDMA bandwidth (ib_write_bw):")
// Cross-rail section rendered.
- assert.Contains(t, joined, "Cross-rail canary:")
+ assert.Contains(t, joined, "Cross-rail canary — RDMA ping (rping):")
// Header row of the grid.
assert.Contains(t, joined, "src \\ dst")
// Axis labels must be node names, NOT pod names.
assert.Contains(t, joined, "worker-1")
assert.Contains(t, joined, "worker-2")
assert.NotContains(t, joined, "worker-1-pod")
- // Sample cells: green ✓ in non-TTY mode is just plain text.
- assert.Contains(t, joined, "✓ 0% 0.5ms")
- // Failure cell shows ✗ with packet loss.
- assert.Contains(t, joined, "✗ 100%")
+ // Sample cells: rping ✓ in non-TTY mode is just plain text.
+ assert.Contains(t, joined, "✓")
+ // ib_write_bw cell shows formatted Gbps.
+ assert.Contains(t, joined, "194.4 Gbps")
+ // Failure cell shows ✗ for rping.
+ assert.Contains(t, joined, "✗")
// Self-pairs are dashes.
selfDash := 0
for _, l := range out.lines {
@@ -169,27 +191,46 @@ func TestShortPodName(t *testing.T) {
func TestCellFor(t *testing.T) {
t.Run("self pair is dash", func(t *testing.T) {
- assert.Equal(t, "—", cellFor("pod-a", "pod-a", nil, false))
+ assert.Equal(t, "—", cellFor("pod-a", "pod-a", nil, familyRPing, false))
})
t.Run("missing result is bullet", func(t *testing.T) {
- assert.Equal(t, "·", cellFor("pod-a", "pod-b", nil, false))
+ assert.Equal(t, "·", cellFor("pod-a", "pod-b", nil, familyRPing, false))
+ })
+ t.Run("rping OK is bare checkmark", func(t *testing.T) {
+ r := PingResult{OK: true}
+ assert.Equal(t, "✓", cellFor("a", "b", &r, familyRPing, false))
+ })
+ t.Run("rping fail is bare X", func(t *testing.T) {
+ r := PingResult{OK: false}
+ assert.Equal(t, "✗", cellFor("a", "b", &r, familyRPing, false))
})
- t.Run("OK result formats with loss + rtt", func(t *testing.T) {
- r := PingResult{OK: true, PacketLoss: 0, RTTAvgMs: 1.23}
- assert.Equal(t, "✓ 0% 1.2ms", cellFor("a", "b", &r, false))
+ t.Run("ib_write_bw OK shows Gbps", func(t *testing.T) {
+ r := PingResult{OK: true, BandwidthGbps: 194.39}
+ assert.Equal(t, "✓ 194.4 Gbps", cellFor("a", "b", &r, familyIbBw, false))
})
- t.Run("partial loss is failure", func(t *testing.T) {
- r := PingResult{OK: false, PacketLoss: 33}
- assert.Equal(t, "✗ 33%", cellFor("a", "b", &r, false))
+ t.Run("ib_write_bw fail with no bandwidth is ERR", func(t *testing.T) {
+ r := PingResult{OK: false}
+ assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, familyIbBw, false))
})
- t.Run("exec error has no loss reading", func(t *testing.T) {
- r := PingResult{OK: false, PacketLoss: -1}
- assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, false))
+ t.Run("ib_write_bw OK but zero bandwidth is ERR", func(t *testing.T) {
+ // Defensive: OK=true with zero bandwidth shouldn't happen
+ // in practice, but if it does the cell renders as ERR
+ // since a "0 Gbps" link is broken even if perftest exited
+ // cleanly.
+ r := PingResult{OK: true, BandwidthGbps: 0}
+ assert.Equal(t, "✗ ERR", cellFor("a", "b", &r, familyIbBw, false))
})
- t.Run("TTY mode wraps in ANSI green", func(t *testing.T) {
- r := PingResult{OK: true, PacketLoss: 0}
- got := cellFor("a", "b", &r, true)
+ t.Run("TTY mode wraps in ANSI green for OK", func(t *testing.T) {
+ r := PingResult{OK: true}
+ got := cellFor("a", "b", &r, familyRPing, true)
assert.Contains(t, got, "\033[32m")
assert.Contains(t, got, "\033[0m")
})
}
+
+func TestKindFamilyOf(t *testing.T) {
+ assert.Equal(t, familyRPing, kindFamilyOf(RDMAPingSameRail))
+ assert.Equal(t, familyRPing, kindFamilyOf(RDMAPingCrossRail))
+ assert.Equal(t, familyIbBw, kindFamilyOf(RDMABwSameRail))
+ assert.Equal(t, familyIbBw, kindFamilyOf(RDMABwCrossRail))
+}
diff --git a/pkg/networkoperatorplugin/crstate/registry.go b/pkg/networkoperatorplugin/crstate/registry.go
index 027c5aa..7c157e0 100644
--- a/pkg/networkoperatorplugin/crstate/registry.go
+++ b/pkg/networkoperatorplugin/crstate/registry.go
@@ -66,3 +66,45 @@ func (r *Registry) Validate(ctx context.Context, c client.Client, obj *unstructu
}
return DefaultExistenceValidator(ctx, c, obj)
}
+
+// NeedsObservationGate reports whether a CR's authoritative status
+// lives on the object itself (vs on a companion CR). The deploy
+// state machine uses this to decide whether the post-apply
+// resourceVersion bump is a meaningful "controller has reacted"
+// signal:
+//
+// - true — validator reads `obj.status.*` directly (NCP/NNP/the
+// three Mellanox Network Kinds, SpectrumXRailPoolConfig
+// v1alpha2). Until the controller writes status back the RV
+// stays at the apply-time value, and the validator would return
+// stale data from the previous reconcile. Gate the verdict
+// until live RV moves past apply-time RV.
+//
+// - false — validator reads companion CRs whose lifecycle is
+// independent of the apply (SriovNetworkNodePolicy →
+// SriovNetworkNodeState per node; NicInterfaceNameTemplate /
+// NicConfigurationTemplate → NicDevice per device). The
+// companion's RV evolves on its own schedule and the
+// SriovNetworkNodePolicy itself never gets a status write, so
+// gating on its RV would block forever. Validators in this
+// bucket get to give an immediate verdict — staleness is their
+// own problem to detect (and the SR-IOV validator does, by
+// bucketing the "Succeeded but numVfs not at target" case as
+// in-progress per the SR-IOV soft-progress rule).
+//
+// IPPool / CIDRPool / SriovNetwork / SriovIBNetwork / OVSNetwork
+// fall through to the default existence-only validator and also
+// don't need the gate — there's no status to read at all.
+func NeedsObservationGate(gvk schema.GroupVersionKind) bool {
+ if gvk.Group == "mellanox.com" && gvk.Version == "v1alpha1" {
+ switch gvk.Kind {
+ case "NicClusterPolicy", "NicNodePolicy",
+ "HostDeviceNetwork", "IPoIBNetwork", "MacvlanNetwork":
+ return true
+ }
+ }
+ if gvk.Group == spcxGroup && gvk.Version == spcxVersionAlpha2 && gvk.Kind == spcxKindRailPoolConfig {
+ return true
+ }
+ return false
+}
diff --git a/pkg/networkoperatorplugin/crstate/registry_test.go b/pkg/networkoperatorplugin/crstate/registry_test.go
index 160931f..b8f4514 100644
--- a/pkg/networkoperatorplugin/crstate/registry_test.go
+++ b/pkg/networkoperatorplugin/crstate/registry_test.go
@@ -190,6 +190,42 @@ func TestStatusStringValidator_RegisteredKinds(t *testing.T) {
}
}
+func TestNeedsObservationGate(t *testing.T) {
+ cases := []struct {
+ gvk schema.GroupVersionKind
+ want bool
+ name string
+ }{
+ // Kinds whose validators read .status on the CR itself —
+ // gate is meaningful, controller's RV bump is the signal.
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "NicClusterPolicy"}, true, "NicClusterPolicy"},
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "NicNodePolicy"}, true, "NicNodePolicy"},
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "HostDeviceNetwork"}, true, "HostDeviceNetwork"},
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "IPoIBNetwork"}, true, "IPoIBNetwork"},
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1alpha1", Kind: "MacvlanNetwork"}, true, "MacvlanNetwork"},
+ {schema.GroupVersionKind{Group: "spectrumx.nvidia.com", Version: "v1alpha2", Kind: "SpectrumXRailPoolConfig"}, true, "SpectrumXRailPoolConfig"},
+
+ // Kinds whose validators read companion CRs — gate would
+ // block forever waiting for an RV bump that never lands.
+ {schema.GroupVersionKind{Group: "sriovnetwork.openshift.io", Version: "v1", Kind: "SriovNetworkNodePolicy"}, false, "SriovNetworkNodePolicy (companion = SriovNetworkNodeState)"},
+ {schema.GroupVersionKind{Group: "configuration.net.nvidia.com", Version: "v1alpha1", Kind: "NicInterfaceNameTemplate"}, false, "NicInterfaceNameTemplate (companion = NicDevice)"},
+ {schema.GroupVersionKind{Group: "configuration.net.nvidia.com", Version: "v1alpha1", Kind: "NicConfigurationTemplate"}, false, "NicConfigurationTemplate (companion = NicDevice)"},
+
+ // Existence-only Kinds — no status at all, gate irrelevant.
+ {schema.GroupVersionKind{Group: "nv-ipam.nvidia.com", Version: "v1alpha1", Kind: "IPPool"}, false, "IPPool"},
+ {schema.GroupVersionKind{Group: "sriovnetwork.openshift.io", Version: "v1", Kind: "SriovNetwork"}, false, "SriovNetwork"},
+
+ // Wrong group / version — never gate.
+ {schema.GroupVersionKind{Group: "mellanox.com", Version: "v1beta1", Kind: "NicClusterPolicy"}, false, "wrong version"},
+ {schema.GroupVersionKind{Group: "other.example.com", Version: "v1", Kind: "NicClusterPolicy"}, false, "wrong group"},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ assert.Equal(t, tc.want, NeedsObservationGate(tc.gvk))
+ })
+ }
+}
+
// node returns a labelled *corev1.Node for fake-client seeding.
func node(name string, labels map[string]string) *corev1.Node {
n := &corev1.Node{}
diff --git a/pkg/networkoperatorplugin/deploy.go b/pkg/networkoperatorplugin/deploy.go
index 37a181a..3b6374e 100644
--- a/pkg/networkoperatorplugin/deploy.go
+++ b/pkg/networkoperatorplugin/deploy.go
@@ -191,7 +191,7 @@ func ApplyManifestsFromDir(ctx context.Context, kubeClient client.Client, manife
}
am := appliedManifest{obj: obj}
- if obj.GetGeneration() > preApplyGen {
+ if obj.GetGeneration() > preApplyGen && crstate.NeedsObservationGate(obj.GroupVersionKind()) {
am.awaitObservationAfterRV = obj.GetResourceVersion()
}
appliedOthers = append(appliedOthers, am)
@@ -331,11 +331,12 @@ func applyAndWait(ctx context.Context, c client.Client, registry *crstate.Regist
// applyUnstructured does an SSA Patch that returns the
// server-decided object on `obj`. Its current resourceVersion
- // is "the version right after our apply". If the spec changed,
- // any subsequent live RV != this value means the controller
- // has written status since.
+ // is "the version right after our apply". If the spec changed
+ // AND this Kind's validator reads .status from the CR itself
+ // (vs from companion CRs), any subsequent live RV != this
+ // value means the controller has written status since.
awaitObservationAfterRV := ""
- if obj.GetGeneration() > preApplyGen {
+ if obj.GetGeneration() > preApplyGen && crstate.NeedsObservationGate(obj.GroupVersionKind()) {
awaitObservationAfterRV = obj.GetResourceVersion()
log.Log.V(1).Info("Spec changed; gating poll on controller observation",
"kind", obj.GetKind(), "name", obj.GetName(),
diff --git a/pkg/networkoperatorplugin/discovery.go b/pkg/networkoperatorplugin/discovery.go
index 7ce73c6..04591a4 100644
--- a/pkg/networkoperatorplugin/discovery.go
+++ b/pkg/networkoperatorplugin/discovery.go
@@ -22,7 +22,6 @@ import (
"fmt"
"slices"
"sort"
- "strconv"
"strings"
"time"
@@ -32,6 +31,7 @@ import (
"github.com/nvidia/k8s-launch-kit/pkg/config"
"github.com/nvidia/k8s-launch-kit/pkg/kubeclient"
"github.com/nvidia/k8s-launch-kit/pkg/networkoperatorplugin/internal/pciids"
+ "github.com/nvidia/k8s-launch-kit/pkg/presetmatch"
"github.com/nvidia/k8s-launch-kit/pkg/presets"
"github.com/nvidia/k8s-launch-kit/pkg/ui"
corev1 "k8s.io/api/core/v1"
@@ -295,55 +295,44 @@ func (p *NetworkOperatorPlugin) DiscoverClusterConfig(ctx context.Context, c cli
discoverGroupFabric(ctx, p.RESTConfig,
defaultConfig.NetworkOperator.Namespace, group, dsPods)
- // Try to enrich with a predefined topology preset for this (machine,
- // GPU) pair. Presets provide authoritative traffic classification,
- // rail assignments, and NUMA/GPU topology metadata for known
- // hardware configurations. Lookup is exact-match on (machineType,
+ // Try to enrich with a predefined topology preset for this
+ // (machine, GPU) pair. presetmatch.MatchGroup runs the
+ // shared lookup + deviation comparison (also used by
+ // `l8k validate`); the discovery path then additionally
+ // applies the preset onto the group so rail/NUMA topology
+ // fields populate. Lookup is exact-match on (machineType,
// gpuType) — both must be known for a preset to apply.
- if group.MachineType != "" && group.GPUType != "" {
- log.Log.V(1).Info("Looking up preset by (machineType, gpuType)",
- "group", group.Identifier,
- "machineType", group.MachineType,
- "gpuType", group.GPUType)
- preset, presetErr := presets.LoadPreset(group.MachineType, group.GPUType)
- if presetErr != nil {
- log.Log.Error(presetErr, "failed to load preset",
- "machineType", group.MachineType, "gpuType", group.GPUType)
- uiOutput.Warning("Failed to load preset for %s/%s: %v",
- group.MachineType, group.GPUType, presetErr)
- } else if preset == nil {
- log.Log.V(1).Info("No preset matched (machineType, gpuType)",
- "group", group.Identifier,
- "machineType", group.MachineType,
- "gpuType", group.GPUType)
- } else {
- // Always apply the matched preset on a best-effort basis.
- // Any discrepancies (PF count, PCI address drift,
- // device-ID drift) are recorded as soft deviations and
- // re-warned about on every subsequent config load.
- deviations := presets.ValidatePreset(preset, group.PFs)
+ matchResult := presetmatch.MatchGroup(*group)
+ log.Log.V(1).Info("Preset match",
+ "group", group.Identifier,
+ "machineType", group.MachineType,
+ "gpuType", group.GPUType,
+ "status", string(matchResult.Status),
+ "presetName", matchResult.PresetName,
+ "deviationCount", len(matchResult.Deviations))
+ switch matchResult.Status {
+ case presetmatch.StatusMatch, presetmatch.StatusDeviation:
+ // LoadPreset was successful — load it again to get
+ // the Topology so ApplyPreset can enrich the group.
+ // (MatchGroup intentionally doesn't mutate.)
+ if preset, err := presets.LoadPreset(group.MachineType, group.GPUType); err == nil && preset != nil {
presets.ApplyPreset(preset, group)
- log.Log.V(1).Info("Preset matched and applied",
- "group", group.Identifier,
- "machineType", group.MachineType,
- "gpuType", group.GPUType,
- "presetPFCount", len(preset.PFs),
- "discoveredPFCount", len(group.PFs),
- "deviationCount", len(deviations))
- if len(deviations) > 0 {
- group.PresetDeviation = deviations
- log.Log.Info("Preset applied with deviations from matched preset",
- "group", group.Identifier,
- "machineType", group.MachineType,
- "gpuType", group.GPUType,
- "deviationCount", len(deviations))
- uiOutput.Warning(
- "Preset for %s/%s applied with %d deviation(s) from the matched preset. The deployment is not certified — see 'presetDeviation' in cluster-config.yaml.",
- group.MachineType, group.GPUType, len(deviations))
- } else {
- uiOutput.Info("Applied preset configuration for %s", group.MachineType)
- }
}
+ if matchResult.Status == presetmatch.StatusDeviation {
+ group.PresetDeviation = matchResult.Deviations
+ uiOutput.Warning(
+ "Preset for %s/%s applied with %d deviation(s) from the matched preset. The deployment is not certified — see 'presetDeviation' in cluster-config.yaml.",
+ group.MachineType, group.GPUType, len(matchResult.Deviations))
+ } else {
+ uiOutput.Info("Applied preset configuration for %s", group.MachineType)
+ }
+ case presetmatch.StatusNotFound:
+ // No catalog entry — discovery continues without
+ // preset enrichment. Logged at V(1) only; not a
+ // user-actionable warning.
+ case presetmatch.StatusSkipped:
+ // machineType / gpuType wasn't discovered. Discovery
+ // already logs this via the hardware-type probes.
}
modules, err := discoverThirdPartyRDMAModules(ctx, p.RESTConfig,
@@ -1360,16 +1349,21 @@ func truncateForLog(s string, maxLen int) string {
// discoverGroupFabric probes the InfiniBand sysfs entries on a representative
// daemon pod for every east-west PF in `group` that has an RdmaDevice and,
-// when the per-port verdicts unanimously agree on a confirmed value, sets
+// when the per-port verdicts unanimously agree on a value, sets
// `group.LinkType`. Otherwise the field is left empty — discovery couldn't
-// prove the cluster is using a specific fabric, and downstream code treats
-// absence as "unknown".
+// determine the fabric type, and downstream code treats absence as
+// "unknown".
//
-// "Confirmed" means the port is ACTIVE and (for InfiniBand) a subnet
-// manager is present (sm_lid != 0). Anything else — port down, IB without
-// SM, malformed sysfs output — yields no contribution to the group's
-// verdict. Reading link_layer alone would be unreliable: that file just
-// reflects firmware config and may be a default the cluster doesn't use.
+// The verdict is the port's configured `link_layer` (sysfs file) — what
+// the firmware says the port is wired for, regardless of whether the
+// netdev is currently up. We deliberately ignore `state` (ACTIVE vs
+// DOWN) and `sm_lid` (IB subnet manager presence) here: requiring an
+// active link broke discovery on freshly-provisioned clusters where
+// the switch wasn't yet plugged in, and the configured link_layer is
+// what every downstream template needs anyway. An operator who
+// reflashes a card to a different fabric needs to re-run discover, but
+// that's the only failure mode we accept in exchange for the
+// "discover before the cluster is wired up" win.
//
// Multi-node groups whose RdmaDevice is empty (per the existing
// per-node-vs-group safety rule) skip the probe — there's no ibdev name
@@ -1391,7 +1385,7 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
containerName = targetPod.Spec.Containers[0].Name
}
- verdicts := map[string]int{} // confirmed linkType -> count
+ verdicts := map[string]int{} // linkType -> count of contributing PFs
probed := 0
for _, pf := range group.PFs {
if pf.Traffic != "east-west" || pf.RdmaDevice == "" {
@@ -1429,7 +1423,7 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
"group", group.Identifier,
"linkType", k,
"probedPFs", probed,
- "confirmedPFs", verdicts[k])
+ "contributingPFs", verdicts[k])
}
case len(verdicts) > 1:
log.Log.V(1).Info("Group fabric ambiguous (probes disagree); leaving linkType unset",
@@ -1437,126 +1431,65 @@ func discoverGroupFabric(ctx context.Context, restConfig *rest.Config,
"probedPFs", probed,
"verdicts", verdicts)
default:
- log.Log.V(1).Info("Group fabric unconfirmed (no port produced a confirmed verdict); leaving linkType unset",
+ log.Log.V(1).Info("Group fabric unresolved (no port reported a recognised link_layer); leaving linkType unset",
"group", group.Identifier,
"probedPFs", probed)
}
}
// discoverPortFabric reads
-// /sys/class/infiniband//ports//{state,phys_state,link_layer,sm_lid}
-// inside the daemon pod via a single shell exec and returns the confirmed
-// fabric for that port (empty when the port could not produce a confirmed
-// verdict). rawSummary is a short human-readable joined version of the
-// four sysfs values for debug logs.
+// /sys/class/infiniband//ports//link_layer inside the
+// daemon pod and returns the configured fabric for that port —
+// "Ethernet", "InfiniBand", or "" when the file is empty / unreadable /
+// unrecognised. The port's runtime state (ACTIVE / DOWN) is
+// intentionally NOT consulted: discovery has to work on freshly
+// provisioned clusters where the switch isn't yet plugged in.
//
// Tries `/sys/class/infiniband/...` first (works when the daemon pod
// shares host pid+net namespace and exposes the host sysfs at /sys),
// then falls back to `/host/sys/class/infiniband/...` for daemons that
// mount the host filesystem under /host (matches consts.HostPath =
-// "/host" used by the rest of nic-configuration-operator). Stderr is
-// captured rather than swallowed so a failed read surfaces in the
-// debug log instead of producing a silent empty verdict.
+// "/host" used by the rest of nic-configuration-operator). The first
+// path that yields a recognised link_layer wins.
func discoverPortFabric(ctx context.Context, restConfig *rest.Config,
namespace, podName, containerName, rdmaDevice string, port int) (string, string, error) {
+ var lastErr error
for _, base := range []string{
- fmt.Sprintf("/sys/class/infiniband/%s/ports/%d", rdmaDevice, port),
- fmt.Sprintf("/host/sys/class/infiniband/%s/ports/%d", rdmaDevice, port),
+ fmt.Sprintf("/sys/class/infiniband/%s/ports/%d/link_layer", rdmaDevice, port),
+ fmt.Sprintf("/host/sys/class/infiniband/%s/ports/%d/link_layer", rdmaDevice, port),
} {
- cmd := fmt.Sprintf(
- "echo state=$(cat %s/state); "+
- "echo phys_state=$(cat %s/phys_state); "+
- "echo link_layer=$(cat %s/link_layer); "+
- "echo sm_lid=$(cat %s/sm_lid)",
- base, base, base, base)
+ cmd := fmt.Sprintf("cat %s", base)
output, err := execInPod(ctx, restConfig, namespace, podName, containerName,
[]string{"/bin/sh", "-c", cmd})
- // Even on exec error we attempt to parse — `cat` returns
- // non-zero for missing files but the SPDY executor still
- // captures the stdout from earlier successful echoes.
+ if err != nil {
+ lastErr = err
+ log.Log.V(1).Info("Fabric port probe: read failed at this base",
+ "rdmaDevice", rdmaDevice, "port", port, "base", base,
+ "execErr", err.Error())
+ continue
+ }
linkType, raw := parsePortFabricVerdict(output)
if linkType != "" {
return linkType, raw, nil
}
- // First-path miss: log the raw read so an operator can see
- // what came back, then try the next base path. We only
- // surface the final error (if any) to the caller.
- log.Log.V(1).Info("Fabric port probe: empty/unconfirmed at this base",
+ log.Log.V(1).Info("Fabric port probe: link_layer at this base not recognised",
"rdmaDevice", rdmaDevice, "port", port, "base", base,
- "raw", raw, "execErr", errString(err))
- if err == nil && raw != "" {
- // Got a clean read that simply didn't meet the
- // confirmation criteria (port DOWN, IB without
- // SM, etc.). No point trying the other base —
- // the port really isn't in a usable state.
- return "", raw, nil
- }
+ "raw", raw)
}
- return "", "", fmt.Errorf("no readable sysfs at either /sys/class/infiniband/%s or /host/sys/class/infiniband/%s",
- rdmaDevice, rdmaDevice)
-}
-
-// errString returns err.Error() or "" when err is nil — used in the
-// V(1) probe log so we don't get the literal "" sentinel.
-func errString(err error) string {
- if err == nil {
- return ""
+ if lastErr != nil {
+ return "", "", lastErr
}
- return err.Error()
+ return "", "", nil
}
-// parsePortFabricVerdict converts the four-line "key=value" output of the
-// sysfs probe into a confirmed fabric verdict (or empty when no
-// confirmation is possible).
-//
-// Confirmation rule:
-// - Active + InfiniBand + sm_lid != 0 → "InfiniBand".
-// - Active + Ethernet → "Ethernet".
-// - Anything else → "" (no confirmation; caller
-// leaves group.LinkType unset).
-//
-// Active means the state file matches "ACTIVE" (case-insensitive); the
-// kernel formats it as "4: ACTIVE", "1: DOWN", etc.
+// parsePortFabricVerdict normalises a sysfs `link_layer` read into the
+// l8k vocabulary ("Ethernet" / "InfiniBand"). The output may be the
+// raw file content ("Ethernet\n"), a `cat`'s output with possible
+// trailing newline, or empty when the file didn't exist. raw is the
+// trimmed input echoed back for debug-log breadcrumbs.
func parsePortFabricVerdict(output string) (linkType, raw string) {
- fields := map[string]string{}
- for _, line := range strings.Split(output, "\n") {
- eq := strings.IndexByte(line, '=')
- if eq < 0 {
- continue
- }
- fields[strings.TrimSpace(line[:eq])] = strings.TrimSpace(line[eq+1:])
- }
- state := fields["state"]
- linkLayer := normalizeLinkLayer(fields["link_layer"])
- smLid := fields["sm_lid"]
-
- raw = fmt.Sprintf("state=%q phys_state=%q link_layer=%q sm_lid=%q",
- state, fields["phys_state"], fields["link_layer"], smLid)
-
- active := strings.Contains(strings.ToUpper(state), "ACTIVE")
- hasSM := smLidIsNonZero(smLid)
-
- switch {
- case active && linkLayer == "InfiniBand" && hasSM:
- return "InfiniBand", raw
- case active && linkLayer == "Ethernet":
- return "Ethernet", raw
- default:
- return "", raw
- }
-}
-
-// smLidIsNonZero parses a sysfs `sm_lid` value (e.g. "0", "0x0", "0x0000",
-// "0x0001") as an unsigned integer and returns true when the value is
-// strictly greater than zero. Kernel versions disagree on the format —
-// some emit decimal, some emit hex — so we accept both via auto-base
-// (base=0 in strconv.ParseUint).
-func smLidIsNonZero(s string) bool {
- v, err := strconv.ParseUint(strings.TrimSpace(s), 0, 32)
- if err != nil {
- return false
- }
- return v != 0
+ raw = strings.TrimSpace(output)
+ return normalizeLinkLayer(raw), raw
}
// normalizeLinkLayer canonicalises sysfs link_layer strings to the YAML
diff --git a/pkg/networkoperatorplugin/discovery_test.go b/pkg/networkoperatorplugin/discovery_test.go
index 305c678..449c162 100644
--- a/pkg/networkoperatorplugin/discovery_test.go
+++ b/pkg/networkoperatorplugin/discovery_test.go
@@ -432,46 +432,43 @@ func TestKnownStorageModules_MatchesMofedmodules(t *testing.T) {
// --- parsePortFabricVerdict tests ---
-func TestParsePortFabricVerdict_ConfirmedInfiniBand(t *testing.T) {
- out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=InfiniBand\nsm_lid=0x0001\n"
- linkType, raw := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_InfiniBand(t *testing.T) {
+ linkType, raw := parsePortFabricVerdict("InfiniBand\n")
assert.Equal(t, "InfiniBand", linkType)
- assert.Contains(t, raw, "sm_lid=\"0x0001\"")
+ assert.Equal(t, "InfiniBand", raw)
}
-func TestParsePortFabricVerdict_ConfirmedEthernet(t *testing.T) {
- // Ethernet ports don't need an SM; ACTIVE + Ethernet alone is enough.
- out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=Ethernet\nsm_lid=0x0000\n"
- linkType, _ := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_Ethernet(t *testing.T) {
+ linkType, raw := parsePortFabricVerdict("Ethernet\n")
assert.Equal(t, "Ethernet", linkType)
+ assert.Equal(t, "Ethernet", raw)
}
-func TestParsePortFabricVerdict_UnverifiedIB_NoSM_ReturnsEmpty(t *testing.T) {
- // Active IB port without a subnet manager — we can't confirm the
- // cluster is using IB. Verdict is empty (caller leaves group.LinkType
- // unset).
- out := "state=4: ACTIVE\nphys_state=5: LinkUp\nlink_layer=InfiniBand\nsm_lid=0x0000\n"
- linkType, _ := parsePortFabricVerdict(out)
- assert.Equal(t, "", linkType)
+func TestParsePortFabricVerdict_DownPortStillResolvesByLinkLayer(t *testing.T) {
+ // Old behaviour required ACTIVE state; the new probe reads
+ // only the configured link_layer file, which gives a verdict
+ // regardless of runtime state. This is what unblocks
+ // discovery on freshly provisioned clusters where the switch
+ // isn't plugged in yet.
+ linkType, _ := parsePortFabricVerdict("Ethernet\n")
+ assert.Equal(t, "Ethernet", linkType)
}
-func TestParsePortFabricVerdict_DownPort_ReturnsEmpty(t *testing.T) {
- // Port not ACTIVE — no confirmation possible.
- out := "state=1: DOWN\nphys_state=3: Disabled\nlink_layer=Ethernet\nsm_lid=0x0000\n"
- linkType, _ := parsePortFabricVerdict(out)
+func TestParsePortFabricVerdict_EmptyOutput(t *testing.T) {
+ linkType, raw := parsePortFabricVerdict("")
assert.Equal(t, "", linkType)
+ assert.Equal(t, "", raw)
}
-func TestParsePortFabricVerdict_EmptyOutput(t *testing.T) {
- linkType, _ := parsePortFabricVerdict("")
+func TestParsePortFabricVerdict_UnrecognisedValue(t *testing.T) {
+ linkType, raw := parsePortFabricVerdict("Foo\n")
assert.Equal(t, "", linkType)
+ assert.Equal(t, "Foo", raw)
}
-func TestParsePortFabricVerdict_PartialOutput(t *testing.T) {
- // link_layer line missing — no verdict.
- out := "state=4: ACTIVE\nphys_state=5: LinkUp\nsm_lid=0x0001\n"
- linkType, _ := parsePortFabricVerdict(out)
- assert.Equal(t, "", linkType)
+func TestParsePortFabricVerdict_TrimsWhitespace(t *testing.T) {
+ linkType, _ := parsePortFabricVerdict(" Ethernet \n")
+ assert.Equal(t, "Ethernet", linkType)
}
func TestNormalizeLinkLayer(t *testing.T) {
diff --git a/pkg/networkoperatorplugin/validate.go b/pkg/networkoperatorplugin/validate.go
index d13f26f..ad2d604 100644
--- a/pkg/networkoperatorplugin/validate.go
+++ b/pkg/networkoperatorplugin/validate.go
@@ -62,7 +62,7 @@ type ValidationResult struct {
Details map[string]string
// LiveYAML is the cluster's view of the object, marshalled
- // back to YAML for the verify-report's expandable "Live YAML"
+ // back to YAML for the validation report's expandable "Live YAML"
// dropdown. Empty when the object isn't present
// (StateNotDeployed) or when the post-validate fetch failed.
// Managed-fields / status are kept; we want the operator to
diff --git a/pkg/presetmatch/presetmatch.go b/pkg/presetmatch/presetmatch.go
new file mode 100644
index 0000000..3f204f5
--- /dev/null
+++ b/pkg/presetmatch/presetmatch.go
@@ -0,0 +1,176 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// Package presetmatch is the single entry point for comparing a
+// discovered cluster group against the topology-preset catalog.
+//
+// Both `l8k discover` (at discovery time, when preset data is also
+// applied into the group via presets.ApplyPreset) and `l8k validate`
+// (at validation time, to confirm that the cluster's recorded hardware
+// still matches the certified preset) call MatchGroup / MatchAll
+// here. Keeping the lookup logic in one place means new lookup
+// behaviour (fuzzy matching, preset-version awareness, …) lands in
+// one file and both call sites pick it up.
+//
+// MatchGroup does not mutate the input group. Callers that want to
+// also enrich the group's transient topology fields call
+// presets.ApplyPreset themselves (typically only the discovery path
+// does that).
+package presetmatch
+
+import (
+ "fmt"
+
+ "github.com/nvidia/k8s-launch-kit/pkg/config"
+ "github.com/nvidia/k8s-launch-kit/pkg/presets"
+)
+
+// Status enumerates the outcomes of a per-group preset lookup.
+type Status string
+
+const (
+ // StatusMatch — preset found and every comparison field
+ // matches the discovered hardware exactly.
+ StatusMatch Status = "match"
+ // StatusDeviation — preset found but the hardware drifts on at
+ // least one field (PF count, PCI addresses, device IDs).
+ // Deviations are informational: the deployment can still run
+ // correctly against drifted hardware, just not against the
+ // certified preset.
+ StatusDeviation Status = "deviation"
+ // StatusNotFound — no preset matches the (machineType, gpuType)
+ // pair. Most common reason: the pair hasn't been catalogued
+ // yet, or discovery didn't populate machineType / gpuType
+ // (e.g. running on hardware the GPU operator labels don't
+ // cover).
+ StatusNotFound Status = "not-found"
+ // StatusSkipped — the lookup couldn't even be attempted (e.g.
+ // machineType or gpuType empty on the group). Distinct from
+ // StatusNotFound so callers can render "discovery is
+ // incomplete" rather than "no preset for this hardware."
+ StatusSkipped Status = "skipped"
+)
+
+// Result describes one group's preset-lookup outcome. Surfaced
+// verbatim by validate's text/JSON output and the HTML report. The
+// Deviations slice mirrors config.PresetDeviationEntry so callers
+// that already render the existing presetDeviation field keep
+// working without translation.
+type Result struct {
+ Group string
+ MachineType string
+ GPUType string
+ // Manufacturer is propagated from the matched preset's
+ // topology.yaml when a preset was found (StatusMatch /
+ // StatusDeviation). Empty otherwise. Surfaced in the user-
+ // facing "server type" label as the leading segment of
+ // --.
+ Manufacturer string
+ Status Status
+ // PresetName is the catalog directory name (what `l8k preset
+ // list` prints) when a preset was found. Empty otherwise.
+ PresetName string
+ // Reason carries a short human-readable explanation when
+ // Status is StatusNotFound or StatusSkipped, and a one-line
+ // summary when StatusDeviation ("3 deviation(s) — pfCount,
+ // pciAddress, deviceID"). Empty when StatusMatch.
+ Reason string
+ Deviations []config.PresetDeviationEntry
+ // Preset is the loaded topology that produced the match. Used
+ // downstream to enrich "missing PCI" rows in the validation
+ // report with the expected deviceID / rail / netdev when the
+ // cluster doesn't have a device the certified topology
+ // expects. nil when Status is NotFound or Skipped.
+ Preset *presets.Topology
+}
+
+// MatchGroup runs the preset lookup + comparison for one group.
+// Does not mutate the group; callers that also want to enrich
+// rail/NUMA topology fields invoke presets.ApplyPreset separately.
+//
+// Lookup is exact-match on (machineType, gpuType). Empty fields
+// short-circuit to StatusSkipped — without those values the preset
+// catalog can't be queried, and we don't fall back to fuzzy matching
+// because picking the wrong preset would silently rewrite the
+// deployment to target the wrong hardware shape.
+func MatchGroup(group config.ClusterConfig) Result {
+ res := Result{
+ Group: group.Identifier,
+ MachineType: group.MachineType,
+ GPUType: group.GPUType,
+ }
+ if group.MachineType == "" || group.GPUType == "" {
+ res.Status = StatusSkipped
+ switch {
+ case group.MachineType == "" && group.GPUType == "":
+ res.Reason = "machineType and gpuType not discovered on group — `l8k discover` did not populate them"
+ case group.MachineType == "":
+ res.Reason = "machineType not discovered on group"
+ default:
+ res.Reason = "gpuType not discovered on group"
+ }
+ return res
+ }
+ preset, err := presets.LoadPreset(group.MachineType, group.GPUType)
+ if err != nil {
+ res.Status = StatusNotFound
+ res.Reason = fmt.Sprintf("preset lookup failed: %v", err)
+ return res
+ }
+ if preset == nil {
+ res.Status = StatusNotFound
+ res.Reason = fmt.Sprintf("no preset matches (%s, %s) in the local presets directory", group.MachineType, group.GPUType)
+ return res
+ }
+ deviations := presets.ValidatePreset(preset, group.PFs)
+ res.PresetName = preset.MachineType + "/" + preset.GPUType
+ res.Manufacturer = preset.Manufacturer
+ res.Deviations = deviations
+ res.Preset = preset
+ if len(deviations) == 0 {
+ res.Status = StatusMatch
+ return res
+ }
+ res.Status = StatusDeviation
+ res.Reason = fmt.Sprintf("%d deviation(s) from matched preset", len(deviations))
+ return res
+}
+
+// MatchAll runs MatchGroup over every entry in cfg.ClusterConfig and
+// returns the results in the same order. cfg is never mutated.
+func MatchAll(cfg *config.LaunchKubernetesConfig) []Result {
+ if cfg == nil {
+ return nil
+ }
+ out := make([]Result, 0, len(cfg.ClusterConfig))
+ for _, g := range cfg.ClusterConfig {
+ out = append(out, MatchGroup(g))
+ }
+ return out
+}
+
+// AnyMatched reports whether any of the results found AND fully
+// matched a preset. Used by the validate CLI to decide whether the
+// "preset" check is worth surfacing at all (vs. hidden when no
+// presets exist for the cluster's hardware).
+func AnyMatched(results []Result) bool {
+ for _, r := range results {
+ if r.Status == StatusMatch || r.Status == StatusDeviation {
+ return true
+ }
+ }
+ return false
+}
diff --git a/pkg/presetmatch/presetmatch_test.go b/pkg/presetmatch/presetmatch_test.go
new file mode 100644
index 0000000..0e31559
--- /dev/null
+++ b/pkg/presetmatch/presetmatch_test.go
@@ -0,0 +1,85 @@
+// Copyright 2026 NVIDIA CORPORATION & AFFILIATES
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package presetmatch
+
+import (
+ "testing"
+
+ "github.com/nvidia/k8s-launch-kit/pkg/config"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMatchGroup_SkippedWhenMachineOrGPUMissing(t *testing.T) {
+ t.Run("both empty", func(t *testing.T) {
+ r := MatchGroup(config.ClusterConfig{Identifier: "g"})
+ assert.Equal(t, StatusSkipped, r.Status)
+ assert.Contains(t, r.Reason, "machineType and gpuType not discovered")
+ })
+ t.Run("machineType only", func(t *testing.T) {
+ r := MatchGroup(config.ClusterConfig{Identifier: "g", MachineType: "vendor-a-h200"})
+ assert.Equal(t, StatusSkipped, r.Status)
+ assert.Contains(t, r.Reason, "gpuType not discovered")
+ })
+ t.Run("gpuType only", func(t *testing.T) {
+ r := MatchGroup(config.ClusterConfig{Identifier: "g", GPUType: "h200"})
+ assert.Equal(t, StatusSkipped, r.Status)
+ assert.Contains(t, r.Reason, "machineType not discovered")
+ })
+}
+
+// The preset catalog can be absent in the test environment (it's
+// installed under /usr/local/share/l8k or similar). When that's the
+// case LoadPreset returns "not found" for any pair — exercising the
+// StatusNotFound path. The other Status outcomes need real preset
+// fixtures and are covered by the higher-level integration tests in
+// pkg/presets and pkg/networkoperatorplugin.
+func TestMatchGroup_NotFoundWhenNoCatalog(t *testing.T) {
+ r := MatchGroup(config.ClusterConfig{
+ Identifier: "g",
+ MachineType: "fictional-machine-no-such-preset",
+ GPUType: "h200",
+ })
+ // Either StatusNotFound (catalog exists but no match) or
+ // StatusNotFound (no catalog at all) — both produce the same
+ // status code, so a single assertion is enough.
+ assert.Equal(t, StatusNotFound, r.Status)
+ assert.NotEmpty(t, r.Reason)
+}
+
+func TestMatchAll(t *testing.T) {
+ cfg := &config.LaunchKubernetesConfig{
+ ClusterConfig: []config.ClusterConfig{
+ {Identifier: "a"},
+ {Identifier: "b", MachineType: "vendor-x", GPUType: "h200"},
+ },
+ }
+ results := MatchAll(cfg)
+ assert.Len(t, results, 2)
+ assert.Equal(t, "a", results[0].Group)
+ assert.Equal(t, StatusSkipped, results[0].Status)
+ assert.Equal(t, "b", results[1].Group)
+ // StatusNotFound when no preset catalog; never panics either way.
+ assert.NotEqual(t, StatusSkipped, results[1].Status)
+}
+
+func TestAnyMatched(t *testing.T) {
+ assert.False(t, AnyMatched(nil))
+ assert.False(t, AnyMatched([]Result{{Status: StatusSkipped}, {Status: StatusNotFound}}))
+ assert.True(t, AnyMatched([]Result{{Status: StatusMatch}}))
+ assert.True(t, AnyMatched([]Result{{Status: StatusDeviation}}))
+ assert.True(t, AnyMatched([]Result{{Status: StatusSkipped}, {Status: StatusMatch}}))
+}
diff --git a/profiles/host-device-rdma/40-example-daemonset.yaml b/profiles/host-device-rdma/40-example-daemonset.yaml
index 904a658..ad13a26 100644
--- a/profiles/host-device-rdma/40-example-daemonset.yaml
+++ b/profiles/host-device-rdma/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
@@ -107,7 +107,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
diff --git a/profiles/ipoib-rdma-shared/40-example-daemonset.yaml b/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
index 29738ee..43448fe 100644
--- a/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
+++ b/profiles/ipoib-rdma-shared/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
@@ -108,7 +108,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
diff --git a/profiles/macvlan-rdma-shared/40-example-daemonset.yaml b/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
index 3e1aeea..b016505 100644
--- a/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
+++ b/profiles/macvlan-rdma-shared/40-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
@@ -108,7 +108,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
diff --git a/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml b/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
index 5cf601a..9afcaff 100644
--- a/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
+++ b/profiles/spectrum-x-ra2.1/90-example-daemonset.yaml
@@ -46,7 +46,8 @@ spec:
{{- end }}
containers:
- name: spectrum-x-test
- image: nvcr.io/nvidia/mellanox/rping-test:latest
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
+ command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK", "NET_RAW"]
diff --git a/profiles/spectrum-x/90-example-daemonset.yaml b/profiles/spectrum-x/90-example-daemonset.yaml
index 5cf601a..9afcaff 100644
--- a/profiles/spectrum-x/90-example-daemonset.yaml
+++ b/profiles/spectrum-x/90-example-daemonset.yaml
@@ -46,7 +46,8 @@ spec:
{{- end }}
containers:
- name: spectrum-x-test
- image: nvcr.io/nvidia/mellanox/rping-test:latest
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
+ command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK", "NET_RAW"]
diff --git a/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml b/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
index abffc47..8a60e87 100644
--- a/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
+++ b/profiles/sriov-ethernet-rdma/60-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
@@ -107,7 +107,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
diff --git a/profiles/sriov-ib-rdma/60-example-daemonset.yaml b/profiles/sriov-ib-rdma/60-example-daemonset.yaml
index 7d56b5c..b14bfd7 100644
--- a/profiles/sriov-ib-rdma/60-example-daemonset.yaml
+++ b/profiles/sriov-ib-rdma/60-example-daemonset.yaml
@@ -44,7 +44,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
@@ -107,7 +107,7 @@ spec:
{{- end }}
containers:
- name: test-container
- image: mellanox/rping-test
+ image: nvcr.io/nvidia/doca/doca:3.3.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities: