diff --git a/demos/cuj1-eks.md b/demos/cuj1-eks.md
index eda8f9481..2fd5b4e41 100644
--- a/demos/cuj1-eks.md
+++ b/demos/cuj1-eks.md
@@ -97,12 +97,30 @@ spec:
         nvidia.com/gpu: 1
       limits:
         nvidia.com/gpu: 1
-  podTemplateOverrides:
-    - targetJobs:
-        - name: node
-      spec:
-        tolerations:
-          - operator: Exists
+  # Inject AICR-standard GPU node scheduling. kubeflow-trainer v2.2.0 replaced
+  # podTemplateOverrides with the runtimePatches API (PR kubeflow/trainer#3309).
+  runtimePatches:
+    - manager: aicr.nvidia.com/demo
+      trainingRuntimeSpec:
+        template:
+          spec:
+            replicatedJobs:
+              - name: node
+                template:
+                  spec:
+                    template:
+                      spec:
+                        nodeSelector:
+                          nodeGroup: gpu-worker
+                        tolerations:
+                          - key: dedicated
+                            operator: Equal
+                            value: worker-workload
+                            effect: NoSchedule
+                          - key: dedicated
+                            operator: Equal
+                            value: worker-workload
+                            effect: NoExecute
   runtimeRef:
     name: torch-distributed
     apiGroup: trainer.kubeflow.org
diff --git a/demos/cuj1-gke.md b/demos/cuj1-gke.md
index 7673a9c34..98215b7b8 100644
--- a/demos/cuj1-gke.md
+++ b/demos/cuj1-gke.md
@@ -99,12 +99,32 @@ spec:
         nvidia.com/gpu: 1
       limits:
         nvidia.com/gpu: 1
-  podTemplateOverrides:
-    - targetJobs:
-        - name: node
-      spec:
-        tolerations:
-          - operator: Exists
+  # Inject GKE GPU node scheduling. Matches the snapshot/bundle/validate
+  # tolerations above (`dedicated=gpu-workload:NoSchedule` plus the GKE-managed
+  # `nvidia.com/gpu=present:NoSchedule` taint). kubeflow-trainer v2.2.0 replaced
+  # podTemplateOverrides with the runtimePatches API (PR kubeflow/trainer#3309).
+  runtimePatches:
+    - manager: aicr.nvidia.com/demo
+      trainingRuntimeSpec:
+        template:
+          spec:
+            replicatedJobs:
+              - name: node
+                template:
+                  spec:
+                    template:
+                      spec:
+                        nodeSelector:
+                          nodeGroup: gpu-worker
+                        tolerations:
+                          - key: dedicated
+                            operator: Equal
+                            value: gpu-workload
+                            effect: NoSchedule
+                          - key: nvidia.com/gpu
+                            operator: Equal
+                            value: present
+                            effect: NoSchedule
   runtimeRef:
     name: torch-distributed
     apiGroup: trainer.kubeflow.org
diff --git a/docs/contributor/component.md b/docs/contributor/component.md
index 4a61706e5..365d69cc6 100644
--- a/docs/contributor/component.md
+++ b/docs/contributor/component.md
@@ -430,7 +430,7 @@ Deployers respect the `deploymentOrder` field from the recipe to ensure componen
 ```yaml
 componentRefs:
   - name: cert-manager
-    version: v1.17.2
+    version: v1.20.2
   - name: gpu-operator
     version: v25.3.3
   - name: network-operator
diff --git a/docs/contributor/data.md b/docs/contributor/data.md
index 7acfa3f6f..a31ae0664 100644
--- a/docs/contributor/data.md
+++ b/docs/contributor/data.md
@@ -333,7 +333,7 @@ spec:
     - name: cert-manager
       type: Helm
       source: https://charts.jetstack.io
-      version: v1.17.2
+      version: v1.20.2
       valuesFile: components/cert-manager/values.yaml
 
     - name: gpu-operator
@@ -615,7 +615,7 @@ Components can declare dependencies via `dependencyRefs`:
 componentRefs:
   - name: cert-manager
     type: Helm
-    version: v1.17.2
+    version: v1.20.2
 
   - name: gpu-operator
     type: Helm
@@ -997,7 +997,7 @@ curl "http://localhost:8080/v1/recipe?os=ubuntu&service=eks&accelerator=gb200&in
       "name": "cert-manager",
       "type": "Helm",
       "source": "https://charts.jetstack.io",
-      "version": "v1.17.2",
+      "version": "v1.20.2",
       "valuesFile": "components/cert-manager/values.yaml"
     },
     {
diff --git a/docs/user/component-catalog.md b/docs/user/component-catalog.md
index 396a338bb..54035e229 100644
--- a/docs/user/component-catalog.md
+++ b/docs/user/component-catalog.md
@@ -24,7 +24,7 @@ The source of truth is [`recipes/registry.yaml`](https://github.com/NVIDIA/aicr/
 | **prometheus-adapter** | Exposes custom metrics from Prometheus to the Kubernetes metrics API. Enables HPA scaling based on GPU utilization and other custom metrics. | [prometheus-adapter](https://github.com/kubernetes-sigs/prometheus-adapter) |
 | **aws-ebs-csi-driver** | CSI driver for Amazon EBS volumes. Provides persistent storage for workloads on EKS. EKS-specific. | [AWS EBS CSI Driver](https://github.com/kubernetes-sigs/aws-ebs-csi-driver) |
 | **k8s-ephemeral-storage-metrics** | Exports ephemeral storage usage metrics per pod. Useful for monitoring scratch space consumption on GPU nodes. | [k8s-ephemeral-storage-metrics](https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics) |
-| **kai-scheduler** | DRA-aware gang scheduler with hierarchical queues and topology-aware placement. Ensures distributed training jobs land on nodes with optimal interconnect topology. | [KAI Scheduler](https://github.com/NVIDIA/KAI-Scheduler) |
+| **kai-scheduler** | DRA-aware gang scheduler with hierarchical queues and topology-aware placement. Ensures distributed training jobs land on nodes with optimal interconnect topology. | [KAI Scheduler](https://github.com/kai-scheduler/KAI-Scheduler) |
 | **grove** | Pod lifecycle management for Dynamo inference platform. Installed as a standalone component. | [Grove](https://github.com/ai-dynamo/grove) |
 | **dynamo-platform** | NVIDIA Dynamo inference serving platform with bundled CRDs. Distributed inference with prefix-cache-aware routing and disaggregated prefill/decode. | [Dynamo](https://github.com/ai-dynamo/dynamo) |
 | **kgateway-crds** | Custom Resource Definitions for kgateway (Kubernetes Gateway API implementation). | [kgateway](https://github.com/kgateway-dev/kgateway) |
diff --git a/examples/recipes/aks-training.yaml b/examples/recipes/aks-training.yaml
index f3325670c..2c7aebaa8 100644
--- a/examples/recipes/aks-training.yaml
+++ b/examples/recipes/aks-training.yaml
@@ -76,8 +76,8 @@ componentRefs:
     namespace: kai-scheduler
     chart: kai-scheduler
     type: Helm
-    source: oci://ghcr.io/nvidia/kai-scheduler
-    version: v0.13.0
+    source: oci://ghcr.io/kai-scheduler/kai-scheduler
+    version: v0.14.1
     valuesFile: components/kai-scheduler/values.yaml
     dependencyRefs:
       - gpu-operator
diff --git a/pkg/bundler/deployer/helm/helm_test.go b/pkg/bundler/deployer/helm/helm_test.go
index 48c21acd4..4e16f87e7 100644
--- a/pkg/bundler/deployer/helm/helm_test.go
+++ b/pkg/bundler/deployer/helm/helm_test.go
@@ -1895,8 +1895,8 @@ func TestBundleGolden_KaiSchedulerPresent(t *testing.T) {
 					Name:      "kai-scheduler",
 					Namespace: "kai-scheduler",
 					Chart:     "kai-scheduler",
-					Version:   "v0.13.0",
-					Source:    "oci://ghcr.io/nvidia/kai-scheduler",
+					Version:   "v0.14.1",
+					Source:    "oci://ghcr.io/kai-scheduler/kai-scheduler",
 				},
 			},
 			DeploymentOrder: []string{"kai-scheduler"},
diff --git a/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/001-kai-scheduler/upstream.env b/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/001-kai-scheduler/upstream.env
index 29f65e84e..b3cad815e 100644
--- a/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/001-kai-scheduler/upstream.env
+++ b/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/001-kai-scheduler/upstream.env
@@ -1,3 +1,3 @@
-CHART='oci://ghcr.io/nvidia/kai-scheduler/kai-scheduler'
+CHART='oci://ghcr.io/kai-scheduler/kai-scheduler/kai-scheduler'
 REPO=''
-VERSION='v0.13.0'
+VERSION='v0.14.1'
diff --git a/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/README.md b/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/README.md
index 776d26421..bb144f929 100644
--- a/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/README.md
+++ b/pkg/bundler/deployer/helm/testdata/kai_scheduler_present/README.md
@@ -18,7 +18,7 @@ via its own `install.sh`:
 
 | Component | Version | Namespace | Source |
 |-----------|---------|-----------|--------|
-| kai-scheduler | v0.13.0 | kai-scheduler | kai-scheduler (oci://ghcr.io/nvidia/kai-scheduler) |
+| kai-scheduler | v0.14.1 | kai-scheduler | kai-scheduler (oci://ghcr.io/kai-scheduler/kai-scheduler) |
 
 
 
diff --git a/recipes/components/kubeflow-trainer/manifests/torch-distributed-cluster-training-runtime.yaml b/recipes/components/kubeflow-trainer/manifests/torch-distributed-cluster-training-runtime.yaml
index 501c0b4f8..012668cd0 100644
--- a/recipes/components/kubeflow-trainer/manifests/torch-distributed-cluster-training-runtime.yaml
+++ b/recipes/components/kubeflow-trainer/manifests/torch-distributed-cluster-training-runtime.yaml
@@ -25,8 +25,11 @@ metadata:
 spec:
   mlPolicy:
     numNodes: 1
-    torch:
-      numProcPerNode: auto
+    # numProcPerNode was removed from mlPolicy.torch in kubeflow-trainer v2.2.0
+    # (kubeflow/trainer#3239) — Torch now infers parallelism from
+    # the container's nvidia.com/gpu resource limit. mlPolicy.mpi.numProcPerNode
+    # is unaffected.
+    torch: {}
   template:
     spec:
       replicatedJobs:
diff --git a/recipes/overlays/base.yaml b/recipes/overlays/base.yaml
index 7a637b0ea..4acd3decc 100644
--- a/recipes/overlays/base.yaml
+++ b/recipes/overlays/base.yaml
@@ -88,8 +88,8 @@ spec:
 
     - name: kai-scheduler
       type: Helm
-      source: oci://ghcr.io/nvidia/kai-scheduler
-      version: v0.13.0
+      source: oci://ghcr.io/kai-scheduler/kai-scheduler
+      version: v0.14.1
       valuesFile: components/kai-scheduler/values.yaml
       dependencyRefs:
         - gpu-operator
diff --git a/recipes/registry.yaml b/recipes/registry.yaml
index d1b029d0a..ff94a24ba 100644
--- a/recipes/registry.yaml
+++ b/recipes/registry.yaml
@@ -364,9 +364,9 @@ components:
     healthCheck:
       assertFile: checks/kai-scheduler/health-check.yaml
     helm:
-      defaultRepository: oci://ghcr.io/nvidia/kai-scheduler
+      defaultRepository: oci://ghcr.io/kai-scheduler/kai-scheduler
       defaultChart: kai-scheduler
-      defaultVersion: v0.13.0
+      defaultVersion: v0.14.1
       defaultNamespace: kai-scheduler
     nodeScheduling:
       system:
@@ -485,7 +485,7 @@ components:
     helm:
       defaultRepository: oci://ghcr.io/kubeflow/charts
       defaultChart: kubeflow-trainer
-      defaultVersion: 2.1.0
+      defaultVersion: 2.2.0
       defaultNamespace: kubeflow
     nodeScheduling:
       system:
diff --git a/validators/performance/trainer_lifecycle.go b/validators/performance/trainer_lifecycle.go
index 5497dc8d8..65caf0d1e 100644
--- a/validators/performance/trainer_lifecycle.go
+++ b/validators/performance/trainer_lifecycle.go
@@ -47,8 +47,8 @@ import (
 )
 
 const (
-	// trainerArchiveURL is the GitHub tar.gz archive for Kubeflow Trainer v2.1.0.
-	trainerArchiveURL = "https://github.com/kubeflow/trainer/archive/refs/tags/v2.1.0.tar.gz"
+	// trainerArchiveURL is the GitHub tar.gz archive for Kubeflow Trainer v2.2.0.
+	trainerArchiveURL = "https://github.com/kubeflow/trainer/archive/refs/tags/v2.2.0.tar.gz"
 
 	// trainerKustomizePath is the path within the extracted archive to the manager overlay.
 	trainerKustomizePath = "manifests/overlays/manager"
@@ -89,7 +89,7 @@ func isTrainerInstalled(ctx context.Context, dynamicClient dynamic.Interface) (b
 	return true, nil
 }
 
-// installTrainer downloads the Kubeflow Trainer v2.1.0 archive from GitHub, builds the
+// installTrainer downloads the Kubeflow Trainer v2.2.0 archive from GitHub, builds the
 // kustomize manager overlay entirely in Go (no CLI), and applies every resource to the
 // cluster via the dynamic client.  It returns the list of resources it created so the
 // caller can defer deleteTrainer for cleanup.