From 38506b89c9ecb6aa9676e4fb2eb27d6afb58ed9f Mon Sep 17 00:00:00 2001 From: be0x74a Date: Fri, 8 May 2026 16:31:29 +0200 Subject: [PATCH] fix(bench): wait for Terminating namespace before recreating Multi-profile runs (--profile=full, the release-bench workflow) re-use the same namespace names across profiles. The deferred teardown of profile N issues async ns Deletes and returns immediately; profile N+1 then calls ensureNamespace, which Get-succeeded on the still-Terminating namespace and skipped Create. The next CR Create in profile N+1 then raced the finalizer and intermittently saw "namespaces 'bench-src-0' not found" once the ns dropped from etcd. Surfaced on the very first end-to-end --profile=full run on the self-hosted runner: np-typical succeeded, np-stress's bootstrap hit the race on bench-src-0. Fix: ensureNamespace now polls until it sees either NotFound (safe to create) or a non-Terminating phase (reuse), with a 60s deadline. Single point of change; teardown stays best-effort and non-blocking. The unit-test surface for this is awkward (requires either a fake client that returns Terminating-then-NotFound or an envtest cluster); deferring that. The bench-smoke check covers the np-typical happy path; this race only surfaces in multi-profile sequencing, which the release-bench full matrix exercises end-to-end. --- test/bench/bootstrap.go | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/test/bench/bootstrap.go b/test/bench/bootstrap.go index 205a672..b910e58 100644 --- a/test/bench/bootstrap.go +++ b/test/bench/bootstrap.go @@ -5,6 +5,7 @@ import ( "fmt" "time" + corev1 "k8s.io/api/core/v1" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -142,14 +143,34 @@ func installCRDs(ctx context.Context, c *clients, nGVKs int) error { func ptr[T any](v T) *T { return &v } // ensureNamespace creates a namespace (with optional extra labels) if it -// doesn't already exist. Idempotent. +// doesn't already exist, or waits for a previous Terminating namespace to +// finish deletion before re-creating it. Idempotent. +// +// The Terminating wait avoids a race that surfaces when sequential bench +// profiles reuse the same namespace names: profile N's deferred teardown +// issues an async ns Delete and returns immediately; profile N+1's +// bootstrap then calls Get on the same name → succeeds because the ns is +// still in Terminating phase → skips Create → microseconds later the ns +// finalizer completes and the namespace drops from etcd → the next CR +// Create in profile N+1 fails with "namespaces 'bench-src-0' not found". +// Surfaced on the first end-to-end --profile=full run. func ensureNamespace(ctx context.Context, c *clients, name string, extraLabels map[string]string) error { - _, err := c.kube.CoreV1().Namespaces().Get(ctx, name, metav1.GetOptions{}) - if err == nil { - return nil - } - if !apierrors.IsNotFound(err) { - return err + deadline := time.Now().Add(60 * time.Second) + for { + ns, err := c.kube.CoreV1().Namespaces().Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + break // gone — safe to create fresh + } + if err != nil { + return err + } + if ns.Status.Phase != corev1.NamespaceTerminating { + return nil // already Active — reuse as-is + } + if time.Now().After(deadline) { + return fmt.Errorf("namespace %s stuck in Terminating phase after 60s", name) + } + time.Sleep(500 * time.Millisecond) } labels := map[string]interface{}{"bench": "true"} for k, v := range extraLabels { @@ -162,7 +183,7 @@ func ensureNamespace(ctx context.Context, c *clients, name string, extraLabels m "labels": labels, }, }} - _, err = c.dynamic.Resource(nsGVR).Create(ctx, u, metav1.CreateOptions{}) + _, err := c.dynamic.Resource(nsGVR).Create(ctx, u, metav1.CreateOptions{}) return err }