Skip to content

Commit ff9a259

Browse files
committed
Fix ResourceFlavor immutability caused by TAS opt-in change
The commit c23eccf moved kaiwo/worker=true outside the if/else in ConvertKaiwoToKueueResourceFlavor, causing spec drift on existing ResourceFlavors that had topologyName set. Since Kueue makes ResourceFlavor specs immutable once topologyName is set, the controller entered an infinite update-rejection loop. The flavor was also stuck deleting due to Kueue's resource-in-use finalizer. Fixes: - Restore kaiwo/worker label to only apply to flavors without topology, preventing spec drift on immutable ResourceFlavors - Keep topologyName on auto-generated ResourceFlavors (enables TAS capability); TAS is opt-in at the workload level via preferredTopologyLabel/requiredTopologyLabel annotations - Add replaceResourceFlavor helper in syncResourceFlavors that attempts in-place update, falls back to delete-and-recreate only on immutability errors (Invalid/Forbidden), skips terminating objects, and accepts eventual convergence across reconciliation cycles - Reorder SyncKueueResources to sync topologies before resource flavors - Update stale comments on PreferredTopologyLabel and DefaultTopologyName - Add chainsaw tests for TAS flavor labels, topology/non-topology flavor behavior, mutable flavor updates, and topology migration path
1 parent dac49fd commit ff9a259

9 files changed

Lines changed: 452 additions & 14 deletions

File tree

apis/config/v1alpha1/kaiwoconfig_types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ type KaiwoConfigSpec struct {
7878
DynamicallyUpdateDefaultClusterQueue bool `json:"dynamicallyUpdateDefaultClusterQueue,omitempty"`
7979

8080
// DefaultTopologyName is the name of the default Kueue Topology used for Topology Aware Scheduling.
81-
// Auto-generated ResourceFlavors reference this topology when DynamicallyUpdateDefaultClusterQueue is enabled.
81+
// Auto-generated ResourceFlavors reference this topology to enable TAS capability.
82+
// Workloads opt in to TAS by setting preferredTopologyLabel or requiredTopologyLabel.
8283
// +kubebuilder:default="default-topology"
8384
DefaultTopologyName string `json:"defaultTopologyName,omitempty"`
8485
}

apis/kaiwo/v1alpha1/common_types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,11 @@ type CommonMetaSpec struct {
124124
// Duration specifies the maximum duration over which the workload can run. This is useful for avoiding workloads running indefinitely.
125125
Duration *metav1.Duration `json:"duration,omitempty"`
126126

127-
// PreferredTopologyLabel specifies the preferred topology label for scheduling the workload. This is used to influence how the workload is distributed across nodes in the cluster.
128-
// If not specified, Kaiwo will use the default topology labels defined in the default topology of KaiwoQueueConfig starting at the host level.
127+
// PreferredTopologyLabel specifies the preferred topology label for scheduling the workload (opt-in).
128+
// When set, Kueue's Topology Aware Scheduling is activated for this workload.
129129
// The levels are evaluated one-by-one going up from the level indicated by the label. If the PodSet cannot fit within a given topology label then the next topology level up is considered.
130-
// If the PodSet cannot fit at the highest topology level, then it is distributed among multiple topology domains
130+
// If the PodSet cannot fit at the highest topology level, then it is distributed among multiple topology domains.
131+
// If not specified, no topology-aware scheduling is applied.
131132
PreferredTopologyLabel string `json:"preferredTopologyLabel,omitempty"`
132133

133134
// RequiredTopologyLabel specifies the required topology label for scheduling the workload. This is used to ensure that the workload is scheduled on nodes that match the specified topology label.

internal/controller/kaiwoqueueconfig_controller.go

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,11 @@ func (r *KaiwoQueueConfigReconciler) SyncKueueResources(ctx context.Context, que
209209
return err
210210
}
211211

212-
success := r.syncResourceFlavors(ctx, queueConfig, existingFlavors)
212+
success := r.syncTopologies(ctx, queueConfig, existingTopologies)
213+
success = r.syncResourceFlavors(ctx, queueConfig, existingFlavors) && success
213214
success = r.syncClusterQueues(ctx, queueConfig, existingQueues) && success
214215
success = r.syncLocalQueues(ctx, queueConfig, existingLocalQueues) && success
215216
success = r.syncWorkloadPriorityClasses(ctx, queueConfig, existingPriorityClasses) && success
216-
success = r.syncTopologies(ctx, queueConfig, existingTopologies) && success
217217

218218
if success {
219219
logger.Info("Successfully synced all Kueue resources")
@@ -247,15 +247,9 @@ func (r *KaiwoQueueConfigReconciler) syncResourceFlavors(ctx context.Context, qu
247247
r.emitEvent(queueConfig, "resource flavor", "create", &kueueFlavor, err)
248248

249249
} else if !controllerutils.CompareResourceFlavors(existingFlavor, kueueFlavor) {
250-
logger.Info("Updating ResourceFlavor", "name", kueueFlavor.Name)
251-
existingFlavor.Spec = kueueFlavor.Spec
252-
err := r.Update(ctx, &existingFlavor)
253-
if err != nil {
254-
logger.Error(err, "Failed to update ResourceFlavor", "name", kueueFlavor.Name)
250+
if !r.replaceResourceFlavor(ctx, queueConfig, existingFlavor, kueueFlavor) {
255251
success = false
256252
}
257-
r.emitEvent(queueConfig, "resource flavor", "update", &existingFlavor, err)
258-
259253
}
260254
existingFlavorMap[kueueFlavor.Name] = kueueFlavor
261255
}
@@ -275,6 +269,65 @@ func (r *KaiwoQueueConfigReconciler) syncResourceFlavors(ctx context.Context, qu
275269
return success
276270
}
277271

272+
// replaceResourceFlavor attempts an in-place update of a ResourceFlavor.
273+
// If the update is rejected due to immutability (Kueue makes specs immutable
274+
// when topologyName is set), it falls back to delete-and-recreate. The recreate
275+
// may not succeed in a single cycle if Kueue's resource-in-use finalizer is
276+
// present; in that case the next reconciliation will pick it up.
277+
func (r *KaiwoQueueConfigReconciler) replaceResourceFlavor(
278+
ctx context.Context,
279+
queueConfig *kaiwo.KaiwoQueueConfig,
280+
existing kueuev1beta1.ResourceFlavor,
281+
desired kueuev1beta1.ResourceFlavor,
282+
) bool {
283+
logger := log.FromContext(ctx)
284+
285+
if existing.DeletionTimestamp != nil {
286+
logger.Info("ResourceFlavor is terminating, will converge on a future cycle", "name", existing.Name)
287+
return true
288+
}
289+
290+
logger.Info("Updating ResourceFlavor", "name", desired.Name)
291+
existing.Spec = desired.Spec
292+
updateErr := r.Update(ctx, &existing)
293+
if updateErr == nil {
294+
r.emitEvent(queueConfig, "resource flavor", "update", &existing, nil)
295+
return true
296+
}
297+
298+
if !errors.IsInvalid(updateErr) && !errors.IsForbidden(updateErr) {
299+
logger.Error(updateErr, "Failed to update ResourceFlavor", "name", desired.Name)
300+
r.emitEvent(queueConfig, "resource flavor", "update", &existing, updateErr)
301+
return false
302+
}
303+
304+
logger.Info("ResourceFlavor spec is immutable, replacing via delete and recreate", "name", desired.Name)
305+
if err := r.Delete(ctx, &existing); err != nil {
306+
logger.Error(err, "Failed to delete immutable ResourceFlavor", "name", desired.Name)
307+
r.emitEvent(queueConfig, "resource flavor", "delete", &existing, err)
308+
return false
309+
}
310+
311+
if err := ctrl.SetControllerReference(queueConfig, &desired, r.Scheme); err != nil {
312+
logger.Error(err, "Failed to set owner reference on replacement ResourceFlavor", "name", desired.Name)
313+
r.emitEvent(queueConfig, "resource flavor", "owner reference", &desired, err)
314+
return false
315+
}
316+
317+
if err := r.Create(ctx, &desired); err != nil {
318+
// The old object may still be terminating (blocked by Kueue's resource-in-use
319+
// finalizer). This is expected; the next reconciliation will create it once
320+
// the old object is fully removed.
321+
logger.Info("Replacement create pending, old ResourceFlavor still terminating", "name", desired.Name, "error", err)
322+
r.emitEvent(queueConfig, "resource flavor", "recreate", &desired, err)
323+
return true
324+
}
325+
326+
logger.Info("Successfully replaced ResourceFlavor", "name", desired.Name)
327+
r.emitEvent(queueConfig, "resource flavor", "recreate", &desired, nil)
328+
return true
329+
}
330+
278331
func (r *KaiwoQueueConfigReconciler) syncTopologies(
279332
ctx context.Context,
280333
queueConfig *kaiwo.KaiwoQueueConfig,

internal/controller/utils/kueue.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,9 @@ func ConvertKaiwoToKueueResourceFlavor(kaiwoFlavor kaiwo.ResourceFlavorSpec) kue
385385
if kaiwoFlavor.TopologyName != "" {
386386
ref := kueuev1beta1.TopologyReference(kaiwoFlavor.TopologyName)
387387
topologyRef = &ref
388+
} else {
389+
nodeLabels[common.DefaultKaiwoWorkerLabel] = "true"
388390
}
389-
nodeLabels[common.DefaultKaiwoWorkerLabel] = "true"
390391

391392
return kueuev1beta1.ResourceFlavor{
392393
ObjectMeta: metav1.ObjectMeta{Name: kaiwoFlavor.Name},
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
apiVersion: chainsaw.kyverno.io/v1alpha1
2+
kind: Test
3+
metadata:
4+
name: auto-generated-flavors-topology-and-labels
5+
spec:
6+
concurrent: false
7+
steps:
8+
- name: Verify auto-generated flavors have topology but no kaiwo/worker in nodeLabels
9+
description: >
10+
Auto-generated ResourceFlavors should reference the default topology
11+
(enabling TAS capability) but should NOT have kaiwo/worker in their
12+
nodeLabels (Kueue makes specs immutable when topologyName is set, so
13+
the nodeLabels must not change after creation).
14+
try:
15+
- script:
16+
content: |
17+
kubectl get kaiwoqueueconfig kaiwo -o json
18+
outputs:
19+
- name: resourceFlavors
20+
value: (to_string(json_parse($stdout).spec.resourceFlavors))
21+
- script:
22+
env:
23+
- name: flavors
24+
value: ($resourceFlavors)
25+
content: |
26+
echo $flavors
27+
check:
28+
# Auto-generated flavors must have topologyName (enables TAS opt-in at workload level)
29+
(contains($stdout, 'topologyName')): true
30+
# Auto-generated flavors must NOT have kaiwo/worker in their spec (would cause
31+
# immutable spec drift since topologyName is set)
32+
(contains($stdout, '"kaiwo/worker"')): false
33+
34+
- name: Verify Kueue ResourceFlavors have topology but no kaiwo/worker
35+
try:
36+
- script:
37+
content: kubectl get resourceflavors -o json
38+
outputs:
39+
- name: rfItems
40+
value: (to_string(json_parse($stdout).items))
41+
- script:
42+
env:
43+
- name: items
44+
value: ($rfItems)
45+
content: |
46+
echo $items
47+
check:
48+
# Kueue ResourceFlavors should have topologyName set
49+
(contains($stdout, '"topologyName"')): true
50+
# Kueue ResourceFlavors with topology should NOT have kaiwo/worker label
51+
(contains($stdout, '"kaiwo/worker"')): false
52+
catch:
53+
- command:
54+
entrypoint: kaiwo-dev
55+
env:
56+
- name: NAMESPACE
57+
value: ($namespace)
58+
- name: PRINT_LEVEL
59+
value: ($values.print_level)
60+
args: ["debug", "chainsaw", "--namespace=$NAMESPACE", "--print-level=$PRINT_LEVEL"]
61+
---
62+
apiVersion: chainsaw.kyverno.io/v1alpha1
63+
kind: Test
64+
metadata:
65+
name: topology-flavor-labels
66+
spec:
67+
concurrent: false
68+
timeouts:
69+
cleanup: 60s
70+
steps:
71+
- name: Apply config with topology and non-topology flavors
72+
try:
73+
- apply:
74+
file: kaiwoqueueconfig-with-topology.yaml
75+
- script:
76+
content: kubectl wait --for=create resourceflavor/no-topo-flavor --timeout 30s
77+
- script:
78+
content: kubectl wait --for=create resourceflavor/topo-flavor --timeout 30s
79+
80+
- name: Verify non-topology flavor has kaiwo/worker label
81+
try:
82+
- script:
83+
content: kubectl get resourceflavor no-topo-flavor -o json
84+
outputs:
85+
- name: noTopoSpec
86+
value: (to_string(json_parse($stdout).spec))
87+
- script:
88+
env:
89+
- name: spec
90+
value: ($noTopoSpec)
91+
content: |
92+
echo $spec
93+
check:
94+
(contains($stdout, '"kaiwo/worker":"true"')): true
95+
(contains($stdout, '"topologyName"')): false
96+
97+
- name: Verify topology flavor does NOT have kaiwo/worker label but has topologyName
98+
try:
99+
- script:
100+
content: kubectl get resourceflavor topo-flavor -o json
101+
outputs:
102+
- name: topoSpec
103+
value: (to_string(json_parse($stdout).spec))
104+
- script:
105+
env:
106+
- name: spec
107+
value: ($topoSpec)
108+
content: |
109+
echo $spec
110+
check:
111+
(contains($stdout, '"kaiwo/worker"')): false
112+
(contains($stdout, '"topologyName":"test-topo"')): true
113+
catch:
114+
- command:
115+
entrypoint: kaiwo-dev
116+
env:
117+
- name: NAMESPACE
118+
value: ($namespace)
119+
- name: PRINT_LEVEL
120+
value: ($values.print_level)
121+
args: ["debug", "chainsaw", "--namespace=$NAMESPACE", "--print-level=$PRINT_LEVEL"]
122+
finally:
123+
- script:
124+
content: kubectl delete kaiwoqueueconfig kaiwo --timeout 30s
125+
---
126+
apiVersion: chainsaw.kyverno.io/v1alpha1
127+
kind: Test
128+
metadata:
129+
name: flavor-spec-update-mutable
130+
spec:
131+
concurrent: false
132+
timeouts:
133+
cleanup: 60s
134+
steps:
135+
- name: Apply initial config with a mutable flavor (no topology)
136+
try:
137+
- apply:
138+
file: kaiwoqueueconfig-initial.yaml
139+
- script:
140+
content: kubectl wait --for=create resourceflavor/update-test-flavor --timeout 30s
141+
- script:
142+
content: kubectl get resourceflavor update-test-flavor -o json
143+
outputs:
144+
- name: initialSpec
145+
value: (to_string(json_parse($stdout).spec))
146+
- script:
147+
env:
148+
- name: spec
149+
value: ($initialSpec)
150+
content: |
151+
echo $spec
152+
check:
153+
(contains($stdout, '"kaiwo/nodepool":"update-test-initial"')): true
154+
155+
- name: Update nodeLabels on mutable flavor and verify the change takes effect
156+
try:
157+
- apply:
158+
file: kaiwoqueueconfig-updated.yaml
159+
- sleep:
160+
duration: 5s
161+
- script:
162+
timeout: 30s
163+
content: |
164+
for i in $(seq 1 15); do
165+
spec=$(kubectl get resourceflavor update-test-flavor -o jsonpath='{.spec.nodeLabels.kaiwo\/nodepool}' 2>/dev/null)
166+
if [ "$spec" = "update-test-updated" ]; then
167+
echo "Flavor nodeLabel updated successfully"
168+
exit 0
169+
fi
170+
sleep 2
171+
done
172+
echo "Flavor nodeLabel was not updated in time"
173+
exit 1
174+
check:
175+
($error == null): true
176+
catch:
177+
- command:
178+
entrypoint: kaiwo-dev
179+
env:
180+
- name: NAMESPACE
181+
value: ($namespace)
182+
- name: PRINT_LEVEL
183+
value: ($values.print_level)
184+
args: ["debug", "chainsaw", "--namespace=$NAMESPACE", "--print-level=$PRINT_LEVEL"]
185+
finally:
186+
- script:
187+
content: kubectl delete kaiwoqueueconfig kaiwo --timeout 30s
188+
---
189+
apiVersion: chainsaw.kyverno.io/v1alpha1
190+
kind: Test
191+
metadata:
192+
name: flavor-topology-migration
193+
spec:
194+
description: >
195+
Tests the migration path: a flavor starts with topologyName set (making
196+
its Kueue spec immutable), then the config is updated to remove topology.
197+
The controller must delete-and-recreate the ResourceFlavor to converge.
198+
concurrent: false
199+
timeouts:
200+
cleanup: 120s
201+
steps:
202+
- name: Apply config with a topology-enabled flavor
203+
try:
204+
- apply:
205+
file: kaiwoqueueconfig-with-topology.yaml
206+
- script:
207+
content: kubectl wait --for=create resourceflavor/topo-flavor --timeout 30s
208+
- script:
209+
content: kubectl get resourceflavor topo-flavor -o json
210+
outputs:
211+
- name: initialSpec
212+
value: (to_string(json_parse($stdout).spec))
213+
- script:
214+
env:
215+
- name: spec
216+
value: ($initialSpec)
217+
content: |
218+
echo $spec
219+
check:
220+
(contains($stdout, '"topologyName":"test-topo"')): true
221+
222+
- name: Remove topology from flavor and verify convergence
223+
try:
224+
- apply:
225+
file: kaiwoqueueconfig-migrated.yaml
226+
- script:
227+
timeout: 60s
228+
content: |
229+
for i in $(seq 1 30); do
230+
topo=$(kubectl get resourceflavor topo-flavor -o jsonpath='{.spec.topologyName}' 2>/dev/null)
231+
worker=$(kubectl get resourceflavor topo-flavor -o jsonpath='{.spec.nodeLabels.kaiwo\/worker}' 2>/dev/null)
232+
deleting=$(kubectl get resourceflavor topo-flavor -o jsonpath='{.metadata.deletionTimestamp}' 2>/dev/null)
233+
if [ -z "$topo" ] && [ "$worker" = "true" ] && [ -z "$deleting" ]; then
234+
echo "Flavor migrated: topology removed, worker label present, not terminating"
235+
exit 0
236+
fi
237+
echo "Waiting for convergence (attempt $i): topologyName=$topo worker=$worker deleting=$deleting"
238+
sleep 2
239+
done
240+
echo "Flavor did not converge in time"
241+
exit 1
242+
check:
243+
($error == null): true
244+
catch:
245+
- command:
246+
entrypoint: kaiwo-dev
247+
env:
248+
- name: NAMESPACE
249+
value: ($namespace)
250+
- name: PRINT_LEVEL
251+
value: ($values.print_level)
252+
args: ["debug", "chainsaw", "--namespace=$NAMESPACE", "--print-level=$PRINT_LEVEL"]
253+
finally:
254+
- script:
255+
content: kubectl delete kaiwoqueueconfig kaiwo --timeout 30s

0 commit comments

Comments
 (0)