diff --git a/examples/recipes/aks-training.yaml b/examples/recipes/aks-training.yaml index e980f6d45..f3325670c 100644 --- a/examples/recipes/aks-training.yaml +++ b/examples/recipes/aks-training.yaml @@ -44,7 +44,7 @@ componentRefs: chart: cert-manager type: Helm source: https://charts.jetstack.io - version: v1.17.2 + version: v1.20.2 valuesFile: components/cert-manager/values.yaml - name: gpu-operator namespace: gpu-operator @@ -86,7 +86,7 @@ componentRefs: chart: kube-prometheus-stack type: Helm source: https://prometheus-community.github.io/helm-charts - version: 82.8.0 + version: 84.4.0 valuesFile: components/kube-prometheus-stack/values.yaml overrides: prometheus: @@ -119,13 +119,11 @@ componentRefs: chart: nvsentinel type: Helm source: oci://ghcr.io/nvidia - version: v0.10.0 + version: v1.3.0 valuesFile: components/nvsentinel/values.yaml dependencyRefs: - cert-manager - gpu-operator - manifestFiles: - - components/nvsentinel/manifests/allow-intra-namespace.yaml - name: prometheus-adapter namespace: monitoring chart: prometheus-adapter @@ -140,7 +138,7 @@ componentRefs: chart: skyhook-operator type: Helm source: https://helm.ngc.nvidia.com/nvidia/skyhook - version: v0.13.1 + version: v0.15.1 valuesFile: components/nodewright-operator/values.yaml deploymentOrder: - cert-manager diff --git a/examples/recipes/eks-gb200-ubuntu-training-with-validation.yaml b/examples/recipes/eks-gb200-ubuntu-training-with-validation.yaml index 2239beed5..51d8a2807 100644 --- a/examples/recipes/eks-gb200-ubuntu-training-with-validation.yaml +++ b/examples/recipes/eks-gb200-ubuntu-training-with-validation.yaml @@ -83,7 +83,7 @@ componentRefs: chart: cert-manager type: Helm source: https://charts.jetstack.io - version: v1.17.2 + version: v1.20.2 valuesFile: components/cert-manager/values.yaml - name: gpu-operator @@ -131,7 +131,7 @@ componentRefs: chart: nvsentinel type: Helm source: oci://ghcr.io/nvidia - version: v0.6.0 + version: v1.3.0 valuesFile: components/nvsentinel/values.yaml dependencyRefs: - cert-manager @@ -141,7 +141,7 @@ componentRefs: chart: skyhook-operator type: Helm source: https://helm.ngc.nvidia.com/nvidia/skyhook - version: 0.14.0 + version: v0.15.1 valuesFile: components/nodewright-operator/values.yaml overrides: customization: ubuntu diff --git a/examples/recipes/eks-training.yaml b/examples/recipes/eks-training.yaml index fa51c96f0..9213890d1 100644 --- a/examples/recipes/eks-training.yaml +++ b/examples/recipes/eks-training.yaml @@ -34,7 +34,7 @@ componentRefs: chart: cert-manager type: Helm source: https://charts.jetstack.io - version: v1.17.2 + version: v1.20.2 valuesFile: components/cert-manager/values.yaml - name: gpu-operator namespace: gpu-operator @@ -50,7 +50,7 @@ componentRefs: chart: nvsentinel type: Helm source: oci://ghcr.io/nvidia - version: v0.6.0 + version: v1.3.0 valuesFile: components/nvsentinel/values.yaml dependencyRefs: - cert-manager @@ -59,7 +59,7 @@ componentRefs: chart: skyhook-operator type: Helm source: https://helm.ngc.nvidia.com/nvidia/skyhook - version: 0.14.0 + version: v0.15.1 valuesFile: components/nodewright-operator/values.yaml deploymentOrder: - cert-manager diff --git a/examples/recipes/kind.yaml b/examples/recipes/kind.yaml index 84c5135e7..eded830e4 100644 --- a/examples/recipes/kind.yaml +++ b/examples/recipes/kind.yaml @@ -30,21 +30,21 @@ componentRefs: chart: cert-manager type: Helm source: https://charts.jetstack.io - version: v1.17.2 + version: v1.20.2 valuesFile: components/cert-manager/values.yaml - name: nodewright-operator namespace: skyhook chart: skyhook-operator type: Helm source: https://helm.ngc.nvidia.com/nvidia/skyhook - version: 0.14.0 + version: v0.15.1 valuesFile: components/nodewright-operator/values.yaml - name: kube-prometheus-stack namespace: nvidia-system chart: kube-prometheus-stack type: Helm source: https://prometheus-community.github.io/helm-charts - version: 82.8.0 + version: 84.4.0 valuesFile: components/kube-prometheus-stack/values.yaml - name: k8s-ephemeral-storage-metrics namespace: nvidia-system diff --git a/recipes/overlays/base.yaml b/recipes/overlays/base.yaml index 56dabf98a..7a637b0ea 100644 --- a/recipes/overlays/base.yaml +++ b/recipes/overlays/base.yaml @@ -34,7 +34,7 @@ spec: - name: cert-manager type: Helm source: https://charts.jetstack.io - version: v1.17.2 + version: v1.20.2 valuesFile: components/cert-manager/values.yaml - name: gpu-operator @@ -52,7 +52,7 @@ spec: - name: nvsentinel type: Helm source: oci://ghcr.io/nvidia - version: v1.1.0 + version: v1.3.0 valuesFile: components/nvsentinel/values.yaml dependencyRefs: - cert-manager @@ -61,13 +61,13 @@ spec: - name: nodewright-operator type: Helm source: https://helm.ngc.nvidia.com/nvidia/skyhook - version: v0.14.0 + version: v0.15.1 valuesFile: components/nodewright-operator/values.yaml - name: kube-prometheus-stack type: Helm source: https://prometheus-community.github.io/helm-charts - version: 82.8.0 + version: 84.4.0 valuesFile: components/kube-prometheus-stack/values.yaml - name: k8s-ephemeral-storage-metrics diff --git a/recipes/overlays/eks.yaml b/recipes/overlays/eks.yaml index 173bbe1ba..5128e52dc 100644 --- a/recipes/overlays/eks.yaml +++ b/recipes/overlays/eks.yaml @@ -38,7 +38,7 @@ spec: - name: aws-ebs-csi-driver type: Helm source: https://kubernetes-sigs.github.io/aws-ebs-csi-driver - version: 2.55.0 + version: 2.59.0 valuesFile: components/aws-ebs-csi-driver/values.yaml # Enable Prometheus persistent storage for EKS (requires EBS CSI driver) diff --git a/recipes/registry.yaml b/recipes/registry.yaml index bc7b73c31..d7f71d32d 100644 --- a/recipes/registry.yaml +++ b/recipes/registry.yaml @@ -145,7 +145,7 @@ components: helm: defaultRepository: https://charts.jetstack.io defaultChart: jetstack/cert-manager - defaultVersion: v1.17.2 + defaultVersion: v1.20.2 defaultNamespace: cert-manager nodeScheduling: system: @@ -228,7 +228,7 @@ components: helm: defaultRepository: https://helm.ngc.nvidia.com/nvidia defaultChart: nvidia/nvsentinel - defaultVersion: v1.1.0 + defaultVersion: v1.3.0 defaultNamespace: nvsentinel nodeScheduling: system: @@ -269,7 +269,7 @@ components: helm: defaultRepository: https://prometheus-community.github.io/helm-charts defaultChart: prometheus-community/kube-prometheus-stack - defaultVersion: 82.8.0 + defaultVersion: 84.4.0 defaultNamespace: monitoring nodeScheduling: system: @@ -320,7 +320,7 @@ components: helm: defaultRepository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver defaultChart: aws-ebs-csi-driver/aws-ebs-csi-driver - defaultVersion: 2.55.0 + defaultVersion: 2.59.0 defaultNamespace: kube-system nodeScheduling: system: @@ -461,7 +461,7 @@ components: helm: defaultRepository: oci://registry.k8s.io/kueue/charts defaultChart: kueue - defaultVersion: "0.17.0" + defaultVersion: "0.17.1" defaultNamespace: kueue-system nodeScheduling: system: