Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/user/cli-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ aicr bundle -r recipe.yaml --nodes 8 -o ./bundles

# Day 2 options: workload-gate and workload-selector for nodewright
aicr bundle -r recipe.yaml \
--workload-gate skyhook.io/runtime-required=true:NoSchedule \
--workload-gate skyhook.nvidia.com/runtime-required=true:NoSchedule \
--workload-selector workload-type=training \
-o ./bundles

Expand Down Expand Up @@ -1230,7 +1230,7 @@ aicr bundle -r recipe.yaml \
```shell
# Generate bundle with day 2 options for training workloads
aicr bundle -r recipe.yaml \
--workload-gate skyhook.io/runtime-required=true:NoSchedule \
--workload-gate skyhook.nvidia.com/runtime-required=true:NoSchedule \
--workload-selector workload-type=training \
--workload-selector intent=training \
--accelerated-node-selector accelerator=nvidia-h100 \
Expand Down
6 changes: 3 additions & 3 deletions pkg/bundler/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ func TestDeployerTypeString(t *testing.T) {
func TestWorkloadGateTaintOptions(t *testing.T) {
t.Run("WithWorkloadGateTaint with valid taint", func(t *testing.T) {
taint := &corev1.Taint{
Key: "skyhook.io/runtime-required",
Key: "skyhook.nvidia.com/runtime-required",
Value: "true",
Effect: corev1.TaintEffectNoSchedule,
}
Expand All @@ -841,8 +841,8 @@ func TestWorkloadGateTaintOptions(t *testing.T) {
if got == nil {
t.Fatal("WorkloadGateTaint() returned nil")
}
if got.Key != "skyhook.io/runtime-required" {
t.Errorf("WorkloadGateTaint().Key = %s, want skyhook.io/runtime-required", got.Key)
if got.Key != "skyhook.nvidia.com/runtime-required" {
t.Errorf("WorkloadGateTaint().Key = %s, want skyhook.nvidia.com/runtime-required", got.Key)
}
if got.Value != "true" {
t.Errorf("WorkloadGateTaint().Value = %s, want true", got.Value)
Expand Down
2 changes: 2 additions & 0 deletions pkg/bundler/deployer/argocdhelm/argocdhelm.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ func (g *Generator) writeStaticValuesAndBuildStubs(outputDir string) ([]string,
component.RemoveValueByPath(staticValues, path)
component.SetValueByPath(stubs, path, val)
} else {
slog.Warn("dynamic path not found in component values; introducing empty placeholder",
"component", ref.Name, "path", path)
component.SetValueByPath(stubs, path, "")
}
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/bundler/deployer/helm/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,8 @@ func writeClusterValuesFile(values map[string]any, dynamicPaths []string, compon
component.RemoveValueByPath(values, path)
} else {
val = ""
slog.Warn("dynamic path not found in component values; introducing empty placeholder",
"component", componentName, "path", path)
}
component.SetValueByPath(clusterValues, path, val)
}
Expand Down
22 changes: 16 additions & 6 deletions pkg/bundler/deployer/helm/templates/README.md.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,15 @@ helm upgrade --install {{ .Name }} {{ .Repository }}/{{ .ChartName }} \
--version {{ .Version }} \
-n {{ .Namespace }} --create-namespace \
-f {{ .Name }}/values.yaml \
-f {{ .Name }}/cluster-values.yaml \
--wait --timeout 10m
{{ else -}}
helm upgrade --install {{ .Name }} {{ .ChartName }} \
--repo {{ .Repository }} \
--version {{ .Version }} \
-n {{ .Namespace }} --create-namespace \
-f {{ .Name }}/values.yaml \
-f {{ .Name }}/cluster-values.yaml \
--wait --timeout 10m
{{ end -}}
```
Expand All @@ -100,19 +102,27 @@ kubectl apply -f {{ .Name }}/manifests/

## Customization

Each Helm component has its own `values.yaml` in its directory.
Edit the file before deploying to customize component configuration:
Each Helm component has two values files in its directory:

```bash
vim gpu-operator/values.yaml
```
- `values.yaml` — resolved configuration from the recipe. Edit to override defaults:

```bash
vim gpu-operator/values.yaml
```

- `cluster-values.yaml` — install-time parameters. Any paths declared with
`aicr bundle --dynamic <component>:<path>` are pulled out of `values.yaml`
and placed here for you to fill in. The file is always created (empty if
no dynamic paths were declared) and passed to `helm upgrade --install`
alongside `values.yaml` by both `deploy.sh` and the per-component commands
in the "Manual Installation" section above.

## Upgrade

To upgrade a specific Helm component:

```bash
helm upgrade <component> <chart> --version <version> -n <namespace> -f <component>/values.yaml --wait --timeout 10m
helm upgrade <component> <chart> --version <version> -n <namespace> -f <component>/values.yaml -f <component>/cluster-values.yaml --wait --timeout 10m
```

## Uninstall
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ helm upgrade --install {{ .Name }} {{ .Repository }}/{{ .ChartName }} \
--version {{ .Version }} \
-n {{ .Namespace }} --create-namespace \
-f values.yaml \
-f cluster-values.yaml \
--wait --timeout 10m
{{ else -}}
helm upgrade --install {{ .Name }} {{ .ChartName }} \
--repo {{ .Repository }} \
--version {{ .Version }} \
-n {{ .Namespace }} --create-namespace \
-f values.yaml \
-f cluster-values.yaml \
--wait --timeout 10m
{{ end -}}
```
Expand All @@ -71,13 +73,15 @@ helm upgrade {{ .Name }} {{ .Repository }}/{{ .ChartName }} \
--version {{ .Version }} \
-n {{ .Namespace }} \
-f values.yaml \
-f cluster-values.yaml \
--wait --timeout 10m
{{ else -}}
helm upgrade {{ .Name }} {{ .ChartName }} \
--repo {{ .Repository }} \
--version {{ .Version }} \
-n {{ .Namespace }} \
-f values.yaml \
-f cluster-values.yaml \
--wait --timeout 10m
{{ end -}}
```
Expand Down
4 changes: 2 additions & 2 deletions pkg/snapshotter/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ func TestParseTaint(t *testing.T) {
}{
{
name: "taint with key, value, and effect",
taintStr: "skyhook.io/runtime-required=true:NoSchedule",
taintStr: "skyhook.nvidia.com/runtime-required=true:NoSchedule",
want: &corev1.Taint{
Key: "skyhook.io/runtime-required",
Key: "skyhook.nvidia.com/runtime-required",
Value: "true",
Effect: corev1.TaintEffectNoSchedule,
},
Expand Down
Loading