From c5594e7e2364212605ff23ca1a9f9b787ec18860 Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Mon, 4 Jun 2018 17:22:48 -0700 Subject: [PATCH 01/18] check for kubernetesConfig nil (#3164) --- pkg/acsengine/engine.go | 11 ++++++++++- pkg/acsengine/engine_test.go | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/pkg/acsengine/engine.go b/pkg/acsengine/engine.go index ff95d5e86c..504a5321e1 100644 --- a/pkg/acsengine/engine.go +++ b/pkg/acsengine/engine.go @@ -63,6 +63,12 @@ func GenerateClusterID(properties *api.Properties) string { // GenerateKubeConfig returns a JSON string representing the KubeConfig func GenerateKubeConfig(properties *api.Properties, location string) (string, error) { + if properties == nil { + return "", fmt.Errorf("Properties nil in GenerateKubeConfig") + } + if properties.CertificateProfile == nil { + return "", fmt.Errorf("CertificateProfile property may not be nil in GenerateKubeConfig") + } b, err := Asset(kubeConfigJSON) if err != nil { return "", fmt.Errorf("error reading kube config template file %s: %s", kubeConfigJSON, err.Error()) @@ -70,7 +76,10 @@ func GenerateKubeConfig(properties *api.Properties, location string) (string, er kubeconfig := string(b) // variable replacement kubeconfig = strings.Replace(kubeconfig, "{{WrapAsVerbatim \"variables('caCertificate')\"}}", base64.StdEncoding.EncodeToString([]byte(properties.CertificateProfile.CaCertificate)), -1) - if properties.OrchestratorProfile.KubernetesConfig.PrivateCluster != nil && helpers.IsTrueBoolPointer(properties.OrchestratorProfile.KubernetesConfig.PrivateCluster.Enabled) { + if properties.OrchestratorProfile != nil && + properties.OrchestratorProfile.KubernetesConfig != nil && + properties.OrchestratorProfile.KubernetesConfig.PrivateCluster != nil && + helpers.IsTrueBoolPointer(properties.OrchestratorProfile.KubernetesConfig.PrivateCluster.Enabled) { if properties.MasterProfile.Count > 1 { // more than 1 master, use the internal lb IP firstMasterIP := net.ParseIP(properties.MasterProfile.FirstConsecutiveStaticIP).To4() diff --git a/pkg/acsengine/engine_test.go b/pkg/acsengine/engine_test.go index e826a2908d..1128a07e64 100644 --- a/pkg/acsengine/engine_test.go +++ b/pkg/acsengine/engine_test.go @@ -596,3 +596,40 @@ func TestGenerateIpList(t *testing.T) { } } } + +func TestGenerateKubeConfig(t *testing.T) { + locale := gotext.NewLocale(path.Join("..", "..", "translations"), "en_US") + i18n.Initialize(locale) + + apiloader := &api.Apiloader{ + Translator: &i18n.Translator{ + Locale: locale, + }, + } + + testData := "./testdata/simple/kubernetes.json" + + containerService, _, err := apiloader.LoadContainerServiceFromFile(testData, true, false, nil) + if err != nil { + t.Fatalf("Failed to load container service from file: %v", err) + } + kubeConfig, err := GenerateKubeConfig(containerService.Properties, "westus2") + // TODO add actual kubeconfig validation + if len(kubeConfig) < 1 { + t.Fatalf("Got unexpected kubeconfig payload: %v", kubeConfig) + } + if err != nil { + t.Fatalf("Failed to call GenerateKubeConfig with simple Kubernetes config from file: %v", testData) + } + + p := api.Properties{} + _, err = GenerateKubeConfig(&p, "westus2") + if err == nil { + t.Fatalf("Expected an error result from nil Properties child properties") + } + + _, err = GenerateKubeConfig(nil, "westus2") + if err == nil { + t.Fatalf("Expected an error result from nil Properties child properties") + } +} From a36be0ce006bea3e569b6c1de3560351fa1009cd Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 5 Jun 2018 15:16:56 -0700 Subject: [PATCH 02/18] add GPU ExtendedResourceToleration admission controller support --- ...ubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml | 4 ++++ parts/k8s/kubernetesagentcustomdata.yml | 3 +++ parts/k8s/kubernetesbase.t | 5 ++++- pkg/acsengine/defaults-apiserver.go | 2 +- pkg/acsengine/defaults-apiserver_test.go | 2 +- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml index 6ed2906463..6b63790647 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml @@ -21,6 +21,10 @@ spec: # This, along with the annotation above marks this pod as a critical add-on. - key: CriticalAddonsOnly operator: Exists + - key: nvidia.com/gpu + effect: NoSchedule + operator: Equal + value: "true" containers: - image: name: nvidia-device-plugin-ctr diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index 0dff9d4cac..d9023941c2 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -137,6 +137,9 @@ write_files: KUBELET_IMAGE={{WrapAsVariable "kubernetesHyperkubeSpec"}} KUBELET_REGISTER_SCHEDULABLE=true KUBELET_NODE_LABELS={{GetAgentKubernetesLabels . "',variables('labelResourceGroup'),'"}} +{{if IsNVIDIADevicePluginEnabled}} + KUBELET_REGISTER_WITH_TAINTS=--register-with-taints={{WrapAsVariable "registerWithGpuTaints"}} + {{end}} AGENT_ARTIFACTS_CONFIG_PLACEHOLDER diff --git a/parts/k8s/kubernetesbase.t b/parts/k8s/kubernetesbase.t index e387be32c6..7462252923 100644 --- a/parts/k8s/kubernetesbase.t +++ b/parts/k8s/kubernetesbase.t @@ -41,6 +41,9 @@ {{range $index, $agent := .AgentPoolProfiles}} "{{.Name}}Index": {{$index}}, {{template "k8s/kubernetesagentvars.t" .}} + {{if IsNVIDIADevicePluginEnabled }} + "registerWithGpuTaints": "nvidia.com/gpu=true:NoSchedule", + {{end}} {{if .IsStorageAccount}} {{if .HasDisks}} "{{.Name}}DataAccountName": "[concat(variables('storageAccountBaseName'), 'data{{$index}}')]", @@ -184,4 +187,4 @@ {{end}} } -} \ No newline at end of file +} diff --git a/pkg/acsengine/defaults-apiserver.go b/pkg/acsengine/defaults-apiserver.go index 3ce272e87d..df31882189 100644 --- a/pkg/acsengine/defaults-apiserver.go +++ b/pkg/acsengine/defaults-apiserver.go @@ -161,7 +161,7 @@ func getDefaultAdmissionControls(cs *api.ContainerService) (string, string) { // Add new version case when applying admission controllers only available in that version or later switch { case common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.9.0"): - admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" + admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages,ExtendedResourceToleration" default: admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" } diff --git a/pkg/acsengine/defaults-apiserver_test.go b/pkg/acsengine/defaults-apiserver_test.go index 58c295198c..214310340a 100644 --- a/pkg/acsengine/defaults-apiserver_test.go +++ b/pkg/acsengine/defaults-apiserver_test.go @@ -298,7 +298,7 @@ func TestAPIServerConfigDefaultAdmissionControls(t *testing.T) { admissonControlKey := "--admission-control" cs := createContainerService("testcluster", version, 3, 2) cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig = map[string]string{} - cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig[admissonControlKey] = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" + cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig[admissonControlKey] = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages,ExtendedResourceToleration" setAPIServerConfig(cs) a := cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig From f39f29ba9605959a7dfedcc083beb8055effb17b Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 5 Jun 2018 15:18:32 -0700 Subject: [PATCH 03/18] Revert "check for kubernetesConfig nil (#3164)" This reverts commit c5594e7e2364212605ff23ca1a9f9b787ec18860. --- pkg/acsengine/engine.go | 11 +---------- pkg/acsengine/engine_test.go | 37 ------------------------------------ 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/pkg/acsengine/engine.go b/pkg/acsengine/engine.go index 504a5321e1..ff95d5e86c 100644 --- a/pkg/acsengine/engine.go +++ b/pkg/acsengine/engine.go @@ -63,12 +63,6 @@ func GenerateClusterID(properties *api.Properties) string { // GenerateKubeConfig returns a JSON string representing the KubeConfig func GenerateKubeConfig(properties *api.Properties, location string) (string, error) { - if properties == nil { - return "", fmt.Errorf("Properties nil in GenerateKubeConfig") - } - if properties.CertificateProfile == nil { - return "", fmt.Errorf("CertificateProfile property may not be nil in GenerateKubeConfig") - } b, err := Asset(kubeConfigJSON) if err != nil { return "", fmt.Errorf("error reading kube config template file %s: %s", kubeConfigJSON, err.Error()) @@ -76,10 +70,7 @@ func GenerateKubeConfig(properties *api.Properties, location string) (string, er kubeconfig := string(b) // variable replacement kubeconfig = strings.Replace(kubeconfig, "{{WrapAsVerbatim \"variables('caCertificate')\"}}", base64.StdEncoding.EncodeToString([]byte(properties.CertificateProfile.CaCertificate)), -1) - if properties.OrchestratorProfile != nil && - properties.OrchestratorProfile.KubernetesConfig != nil && - properties.OrchestratorProfile.KubernetesConfig.PrivateCluster != nil && - helpers.IsTrueBoolPointer(properties.OrchestratorProfile.KubernetesConfig.PrivateCluster.Enabled) { + if properties.OrchestratorProfile.KubernetesConfig.PrivateCluster != nil && helpers.IsTrueBoolPointer(properties.OrchestratorProfile.KubernetesConfig.PrivateCluster.Enabled) { if properties.MasterProfile.Count > 1 { // more than 1 master, use the internal lb IP firstMasterIP := net.ParseIP(properties.MasterProfile.FirstConsecutiveStaticIP).To4() diff --git a/pkg/acsengine/engine_test.go b/pkg/acsengine/engine_test.go index 1128a07e64..e826a2908d 100644 --- a/pkg/acsengine/engine_test.go +++ b/pkg/acsengine/engine_test.go @@ -596,40 +596,3 @@ func TestGenerateIpList(t *testing.T) { } } } - -func TestGenerateKubeConfig(t *testing.T) { - locale := gotext.NewLocale(path.Join("..", "..", "translations"), "en_US") - i18n.Initialize(locale) - - apiloader := &api.Apiloader{ - Translator: &i18n.Translator{ - Locale: locale, - }, - } - - testData := "./testdata/simple/kubernetes.json" - - containerService, _, err := apiloader.LoadContainerServiceFromFile(testData, true, false, nil) - if err != nil { - t.Fatalf("Failed to load container service from file: %v", err) - } - kubeConfig, err := GenerateKubeConfig(containerService.Properties, "westus2") - // TODO add actual kubeconfig validation - if len(kubeConfig) < 1 { - t.Fatalf("Got unexpected kubeconfig payload: %v", kubeConfig) - } - if err != nil { - t.Fatalf("Failed to call GenerateKubeConfig with simple Kubernetes config from file: %v", testData) - } - - p := api.Properties{} - _, err = GenerateKubeConfig(&p, "westus2") - if err == nil { - t.Fatalf("Expected an error result from nil Properties child properties") - } - - _, err = GenerateKubeConfig(nil, "westus2") - if err == nil { - t.Fatalf("Expected an error result from nil Properties child properties") - } -} From e9f061ea77452720470f360f5a1349c485835a7a Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 5 Jun 2018 15:27:55 -0700 Subject: [PATCH 04/18] Update condition to match review --- parts/k8s/kubernetesagentcustomdata.yml | 2 +- parts/k8s/kubernetesbase.t | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index d9023941c2..6af49c859b 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -137,7 +137,7 @@ write_files: KUBELET_IMAGE={{WrapAsVariable "kubernetesHyperkubeSpec"}} KUBELET_REGISTER_SCHEDULABLE=true KUBELET_NODE_LABELS={{GetAgentKubernetesLabels . "',variables('labelResourceGroup'),'"}} -{{if IsNVIDIADevicePluginEnabled}} +{{if IsNSeriesSKU .}} KUBELET_REGISTER_WITH_TAINTS=--register-with-taints={{WrapAsVariable "registerWithGpuTaints"}} {{end}} diff --git a/parts/k8s/kubernetesbase.t b/parts/k8s/kubernetesbase.t index 7462252923..d5c416a74b 100644 --- a/parts/k8s/kubernetesbase.t +++ b/parts/k8s/kubernetesbase.t @@ -41,7 +41,7 @@ {{range $index, $agent := .AgentPoolProfiles}} "{{.Name}}Index": {{$index}}, {{template "k8s/kubernetesagentvars.t" .}} - {{if IsNVIDIADevicePluginEnabled }} + {{if IsNSeriesSKU .}} "registerWithGpuTaints": "nvidia.com/gpu=true:NoSchedule", {{end}} {{if .IsStorageAccount}} From 2511e6f2b3c399af1eb08c41da1ccbf7eda6fb2c Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 5 Jun 2018 22:47:42 -0700 Subject: [PATCH 05/18] Update conditionals to match both N series and device plugin BUGFIX - Update docker runtime on NON GPU enabled machines Add accelerator label to GPU enabled nodes Add nodeselector label match where accelerator=nvidia for nvidia-device-plugin --- ...bernetesmasteraddons-nvidia-device-plugin-daemonset.yaml | 1 + parts/k8s/kubernetesagentcustomdata.yml | 6 ++++-- parts/k8s/kubernetesbase.t | 2 ++ pkg/acsengine/template_generator.go | 4 ++++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml index 6b63790647..39bbd1a99c 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml @@ -41,3 +41,4 @@ spec: path: /var/lib/kubelet/device-plugins nodeSelector: beta.kubernetes.io/os: linux + accelerator: nvidia diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index 6af49c859b..e8fc7c7ab1 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -34,14 +34,14 @@ write_files: "log-opts": { "max-size": "50m", "max-file": "5" - }{{if IsNVIDIADevicePluginEnabled}} + }{{if IsNSeriesSKU .}}{{if IsNVIDIADevicePluginEnabled}} ,"default-runtime": "nvidia", "runtimes": { "nvidia": { "path": "/usr/bin/nvidia-container-runtime", "runtimeArgs": [] } - }{{end}} + }{{end}}{{end}} } - path: "/etc/kubernetes/certs/ca.crt" @@ -138,8 +138,10 @@ write_files: KUBELET_REGISTER_SCHEDULABLE=true KUBELET_NODE_LABELS={{GetAgentKubernetesLabels . "',variables('labelResourceGroup'),'"}} {{if IsNSeriesSKU .}} + {{if IsNVIDIADevicePluginEnabled}} KUBELET_REGISTER_WITH_TAINTS=--register-with-taints={{WrapAsVariable "registerWithGpuTaints"}} {{end}} +{{end}} AGENT_ARTIFACTS_CONFIG_PLACEHOLDER diff --git a/parts/k8s/kubernetesbase.t b/parts/k8s/kubernetesbase.t index d5c416a74b..8a4679245c 100644 --- a/parts/k8s/kubernetesbase.t +++ b/parts/k8s/kubernetesbase.t @@ -42,7 +42,9 @@ "{{.Name}}Index": {{$index}}, {{template "k8s/kubernetesagentvars.t" .}} {{if IsNSeriesSKU .}} + {{if IsNVIDIADevicePluginEnabled}} "registerWithGpuTaints": "nvidia.com/gpu=true:NoSchedule", + {{end}} {{end}} {{if .IsStorageAccount}} {{if .HasDisks}} diff --git a/pkg/acsengine/template_generator.go b/pkg/acsengine/template_generator.go index b11f3aff5b..7ec9230a2f 100644 --- a/pkg/acsengine/template_generator.go +++ b/pkg/acsengine/template_generator.go @@ -198,6 +198,10 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat storagetier, _ := getStorageAccountType(profile.VMSize) buf.WriteString(fmt.Sprintf(",storageprofile=managed,storagetier=%s", storagetier)) } + if isNSeriesSKU(profile) { + accelerator := "nvidia" + buf.WriteString(fmt.Sprintf(",accelerator=%s", accelerator)) + } buf.WriteString(fmt.Sprintf(",kubernetes.azure.com/cluster=%s", rg)) for k, v := range profile.CustomNodeLabels { buf.WriteString(fmt.Sprintf(",%s=%s", k, v)) From c9e6dc12d0082b1ce9e16f251bc65e33687ee954 Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 5 Jun 2018 23:24:10 -0700 Subject: [PATCH 06/18] Add toleration to run kube-proxy on tainted gpu nodes --- .../addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml index 1a2adfc837..b07dddf6f2 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml @@ -19,6 +19,10 @@ spec: operator: Equal value: "true" effect: NoSchedule + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule containers: - command: - "/hyperkube" From 9e8fa642cbc0d371a5f4b33850d8a9a3aea81b7c Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Wed, 6 Jun 2018 12:38:09 -0700 Subject: [PATCH 07/18] Update Nvidia gpu device plugin to upstream ds example --- ...addons-nvidia-device-plugin-daemonset.yaml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml index 39bbd1a99c..1b9c571ff6 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml @@ -1,24 +1,27 @@ -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: DaemonSet metadata: labels: + k8s-app: nvidia-gpu-device-plugin kubernetes.io/cluster-service: "true" - name: nvidia-device-plugin + addonmanager.kubernetes.io/mode: Reconcile + name: nvidia-gpu-device-plugin namespace: kube-system spec: + selector: + matchLabels: + k8s-app: nvidia-gpu-device-plugin + updateStrategy: + type: RollingUpdate template: metadata: - # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler - # reserves resources for critical add-on pods so that they can be rescheduled after - # a failure. This annotation works in tandem with the toleration below. annotations: scheduler.alpha.kubernetes.io/critical-pod: "" labels: - name: nvidia-device-plugin-ds + k8s-app: nvidia-gpu-device-plugin spec: + priorityClassName: system-node-critical tolerations: - # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode. - # This, along with the annotation above marks this pod as a critical add-on. - key: CriticalAddonsOnly operator: Exists - key: nvidia.com/gpu From 50b46a4291e010b1b7a7c528b831f644d2d631c8 Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Wed, 6 Jun 2018 16:25:43 -0700 Subject: [PATCH 08/18] Update docs Update file name to reflect nvidia-gpu Convert nodeSelector to affinity --- ...steraddons-nvidia-gpu-device-plugin-daemonset.yaml} | 10 +++++++++- parts/k8s/kubernetesmastercustomdata.yml | 2 +- pkg/acsengine/addons.go | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) rename parts/k8s/addons/{kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml => kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml} (82%) diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml similarity index 82% rename from parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml rename to parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml index 1b9c571ff6..45297e7b53 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml @@ -21,6 +21,15 @@ spec: k8s-app: nvidia-gpu-device-plugin spec: priorityClassName: system-node-critical + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: accelerator + operator: In + values: + - nvidia tolerations: - key: CriticalAddonsOnly operator: Exists @@ -44,4 +53,3 @@ spec: path: /var/lib/kubelet/device-plugins nodeSelector: beta.kubernetes.io/os: linux - accelerator: nvidia diff --git a/parts/k8s/kubernetesmastercustomdata.yml b/parts/k8s/kubernetesmastercustomdata.yml index 1bbf9b4172..366a687a80 100644 --- a/parts/k8s/kubernetesmastercustomdata.yml +++ b/parts/k8s/kubernetesmastercustomdata.yml @@ -281,7 +281,7 @@ MASTER_ARTIFACTS_CONFIG_PLACEHOLDER {{end}} {{if IsNVIDIADevicePluginEnabled}} - sed -i "s||{{WrapAsVariable "kubernetesNVIDIADevicePluginSpec"}}|g" "/etc/kubernetes/addons/nvidia-device-plugin.yaml" + sed -i "s||{{WrapAsVariable "kubernetesNVIDIADevicePluginSpec"}}|g" "/etc/kubernetes/addons/nvidia-gpu-device-plugin.yaml" {{end}} {{if EnableDataEncryptionAtRest }} diff --git a/pkg/acsengine/addons.go b/pkg/acsengine/addons.go index bf216b4081..b0b3998ab9 100644 --- a/pkg/acsengine/addons.go +++ b/pkg/acsengine/addons.go @@ -33,8 +33,8 @@ func kubernetesAddonSettingsInit(profile *api.Properties) []kubernetesFeatureSet true, }, { - "kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml", - "nvidia-device-plugin.yaml", + "kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml", + "nvidia-gpu-device-plugin.yaml", profile.IsNVIDIADevicePluginEnabled(), }, { From a8df24571cdfc88f0cbdc6421522762ba6186f27 Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Thu, 7 Jun 2018 10:01:53 -0700 Subject: [PATCH 09/18] Add support for 1.11 Remove gpu from the resource name to fall in line with upstream --- ...ddons-nvidia-device-plugin-daemonset.yaml} | 8 +- parts/k8s/kubernetesmastercustomdata.yml | 2 +- pkg/acsengine/addons.go | 4 +- pkg/acsengine/k8s_versions.go | 90 ++++++++++--------- 4 files changed, 53 insertions(+), 51 deletions(-) rename parts/k8s/addons/{kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml => kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml} (89%) diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml similarity index 89% rename from parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml rename to parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml index 45297e7b53..63fa6ac52c 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml @@ -2,15 +2,15 @@ apiVersion: apps/v1 kind: DaemonSet metadata: labels: - k8s-app: nvidia-gpu-device-plugin + k8s-app: nvidia-device-plugin kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile - name: nvidia-gpu-device-plugin + name: nvidia-device-plugin namespace: kube-system spec: selector: matchLabels: - k8s-app: nvidia-gpu-device-plugin + k8s-app: nvidia-device-plugin updateStrategy: type: RollingUpdate template: @@ -18,7 +18,7 @@ spec: annotations: scheduler.alpha.kubernetes.io/critical-pod: "" labels: - k8s-app: nvidia-gpu-device-plugin + k8s-app: nvidia-device-plugin spec: priorityClassName: system-node-critical affinity: diff --git a/parts/k8s/kubernetesmastercustomdata.yml b/parts/k8s/kubernetesmastercustomdata.yml index 366a687a80..1bbf9b4172 100644 --- a/parts/k8s/kubernetesmastercustomdata.yml +++ b/parts/k8s/kubernetesmastercustomdata.yml @@ -281,7 +281,7 @@ MASTER_ARTIFACTS_CONFIG_PLACEHOLDER {{end}} {{if IsNVIDIADevicePluginEnabled}} - sed -i "s||{{WrapAsVariable "kubernetesNVIDIADevicePluginSpec"}}|g" "/etc/kubernetes/addons/nvidia-gpu-device-plugin.yaml" + sed -i "s||{{WrapAsVariable "kubernetesNVIDIADevicePluginSpec"}}|g" "/etc/kubernetes/addons/nvidia-device-plugin.yaml" {{end}} {{if EnableDataEncryptionAtRest }} diff --git a/pkg/acsengine/addons.go b/pkg/acsengine/addons.go index b0b3998ab9..bf216b4081 100644 --- a/pkg/acsengine/addons.go +++ b/pkg/acsengine/addons.go @@ -33,8 +33,8 @@ func kubernetesAddonSettingsInit(profile *api.Properties) []kubernetesFeatureSet true, }, { - "kubernetesmasteraddons-nvidia-gpu-device-plugin-daemonset.yaml", - "nvidia-gpu-device-plugin.yaml", + "kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml", + "nvidia-device-plugin.yaml", profile.IsNVIDIADevicePluginEnabled(), }, { diff --git a/pkg/acsengine/k8s_versions.go b/pkg/acsengine/k8s_versions.go index e043c73fa5..88ffaee036 100644 --- a/pkg/acsengine/k8s_versions.go +++ b/pkg/acsengine/k8s_versions.go @@ -9,31 +9,32 @@ import ( var k8sComponentVersions = map[string]map[string]string{ "1.11": { - "dockerEngine": "1.13.*", - "dashboard": "kubernetes-dashboard-amd64:v1.8.3", - "exechealthz": "exechealthz-amd64:1.2", - "addon-resizer": "addon-resizer:1.8.1", - "heapster": "heapster-amd64:v1.5.1", - "metrics-server": "metrics-server-amd64:v0.2.1", - "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", - "addon-manager": "kube-addon-manager-amd64:v8.6", - "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", - "pause": "pause-amd64:3.1", - "tiller": "tiller:v2.8.1", - "rescheduler": "rescheduler:v0.3.1", - "aci-connector": "virtual-kubelet:latest", - "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, - "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, - "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, - "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, - "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), - "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), - "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), - "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), - "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), - "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), - "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), - "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), + "dockerEngine": "1.13.*", + "dashboard": "kubernetes-dashboard-amd64:v1.8.3", + "exechealthz": "exechealthz-amd64:1.2", + "addon-resizer": "addon-resizer:1.8.1", + "heapster": "heapster-amd64:v1.5.1", + "metrics-server": "metrics-server-amd64:v0.2.1", + "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", + "addon-manager": "kube-addon-manager-amd64:v8.6", + "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", + "pause": "pause-amd64:3.1", + "tiller": "tiller:v2.8.1", + "rescheduler": "rescheduler:v0.3.1", + "aci-connector": "virtual-kubelet:latest", + "nvidia-device-plugin": "k8s-device-plugin:1.10", + "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, + "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, + "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, + "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, + "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), + "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), + "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), + "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), + "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), + "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), + "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), + "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), }, "1.10": { "dockerEngine": "1.13.*", @@ -243,25 +244,26 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "addonresizer": k8sComponentVersions["1.11"]["addon-resizer"], "heapster": k8sComponentVersions["1.11"]["heapster"], DefaultMetricsServerAddonName: k8sComponentVersions["1.11"]["metrics-server"], - "dns": k8sComponentVersions["1.11"]["kube-dns"], - "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], - "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], - "pause": k8sComponentVersions["1.11"]["pause"], - DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], - DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], - DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], - "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], - "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], - "podeviction": k8sComponentVersions["1.11"]["podeviction"], - "routeperiod": k8sComponentVersions["1.11"]["routeperiod"], - "backoffretries": k8sComponentVersions["1.11"]["backoffretries"], - "backoffjitter": k8sComponentVersions["1.11"]["backoffjitter"], - "backoffduration": k8sComponentVersions["1.11"]["backoffduration"], - "backoffexponent": k8sComponentVersions["1.11"]["backoffexponent"], - "ratelimitqps": k8sComponentVersions["1.11"]["ratelimitqps"], - "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], - "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], - "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], + "dns": k8sComponentVersions["1.11"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], + "pause": k8sComponentVersions["1.11"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], + "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.11"]["podeviction"], + "routeperiod": k8sComponentVersions["1.11"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.11"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.11"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.11"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.11"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.11"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], + DefaultNVIDIADevicePluginAddonName: k8sComponentVersions["1.11"]["nvidia-device-plugin"], } case "1.10": ret = map[string]string{ From 83034c127fec7c58f8ee88f5e40399c5d88a952a Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Thu, 7 Jun 2018 10:58:01 -0700 Subject: [PATCH 10/18] cleanup k8s_version_test --- pkg/acsengine/const.go | 4 +- pkg/acsengine/defaults.go | 8 +- pkg/acsengine/k8s_versions.go | 188 ++++++++++++++-------------- pkg/acsengine/k8s_versions_test.go | 44 +++++++ pkg/acsengine/params_k8s.go | 6 +- pkg/acsengine/template_generator.go | 6 +- pkg/api/const.go | 4 +- pkg/api/types.go | 2 +- pkg/api/types_test.go | 2 +- 9 files changed, 154 insertions(+), 110 deletions(-) diff --git a/pkg/acsengine/const.go b/pkg/acsengine/const.go index 73ad77b1c0..0ff923f1af 100644 --- a/pkg/acsengine/const.go +++ b/pkg/acsengine/const.go @@ -136,8 +136,8 @@ const ( DefaultReschedulerAddonName = "rescheduler" // DefaultMetricsServerAddonName is the name of the kubernetes Metrics server addon deployment DefaultMetricsServerAddonName = "metrics-server" - // DefaultNVIDIADevicePluginAddonName is the name of the kubernetes NVIDIA Device Plugin daemon set - DefaultNVIDIADevicePluginAddonName = "nvidia-device-plugin" + // NVIDIADevicePluginAddonName is the name of the kubernetes NVIDIA Device Plugin daemon set + NVIDIADevicePluginAddonName = "nvidia-device-plugin" // DefaultKubernetesKubeletMaxPods is the max pods per kubelet DefaultKubernetesKubeletMaxPods = 110 // DefaultMasterEtcdServerPort is the default etcd server port for Kubernetes master nodes diff --git a/pkg/acsengine/defaults.go b/pkg/acsengine/defaults.go index 1e8ca798ab..653d463878 100644 --- a/pkg/acsengine/defaults.go +++ b/pkg/acsengine/defaults.go @@ -306,10 +306,10 @@ var ( // DefaultNVIDIADevicePluginAddonsConfig is the default NVIDIA Device Plugin Kubernetes addon Config DefaultNVIDIADevicePluginAddonsConfig = api.KubernetesAddon{ - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, Containers: []api.KubernetesContainerSpec{ { - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, }, }, } @@ -420,7 +420,7 @@ func setOrchestratorDefaults(cs *api.ContainerService) { m = getAddonsIndexByName(o.KubernetesConfig.Addons, DefaultMetricsServerAddonName) o.KubernetesConfig.Addons[m].Enabled = k8sVersionMetricsServerAddonEnabled(o) } - n := getAddonsIndexByName(o.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) + n := getAddonsIndexByName(o.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) if n < 0 { // Provide default acs-engine config for NVIDIA Device Plugin o.KubernetesConfig.Addons = append(o.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonsConfig) @@ -520,7 +520,7 @@ func setOrchestratorDefaults(cs *api.ContainerService) { if a.OrchestratorProfile.KubernetesConfig.Addons[m].IsEnabled(api.DefaultMetricsServerAddonEnabled) { a.OrchestratorProfile.KubernetesConfig.Addons[m] = assignDefaultAddonVals(a.OrchestratorProfile.KubernetesConfig.Addons[m], DefaultMetricsServerAddonsConfig) } - n := getAddonsIndexByName(a.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) + n := getAddonsIndexByName(a.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) if a.OrchestratorProfile.KubernetesConfig.Addons[n].IsEnabled(api.DefaultNVIDIADevicePluginAddonEnabled) { a.OrchestratorProfile.KubernetesConfig.Addons[n] = assignDefaultAddonVals(a.OrchestratorProfile.KubernetesConfig.Addons[n], DefaultNVIDIADevicePluginAddonsConfig) } diff --git a/pkg/acsengine/k8s_versions.go b/pkg/acsengine/k8s_versions.go index 88ffaee036..2a77028724 100644 --- a/pkg/acsengine/k8s_versions.go +++ b/pkg/acsengine/k8s_versions.go @@ -9,61 +9,61 @@ import ( var k8sComponentVersions = map[string]map[string]string{ "1.11": { - "dockerEngine": "1.13.*", - "dashboard": "kubernetes-dashboard-amd64:v1.8.3", - "exechealthz": "exechealthz-amd64:1.2", - "addon-resizer": "addon-resizer:1.8.1", - "heapster": "heapster-amd64:v1.5.1", - "metrics-server": "metrics-server-amd64:v0.2.1", - "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", - "addon-manager": "kube-addon-manager-amd64:v8.6", - "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", - "pause": "pause-amd64:3.1", - "tiller": "tiller:v2.8.1", - "rescheduler": "rescheduler:v0.3.1", - "aci-connector": "virtual-kubelet:latest", - "nvidia-device-plugin": "k8s-device-plugin:1.10", - "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, - "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, - "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, - "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, - "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), - "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), - "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), - "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), - "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), - "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), - "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), - "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), + "dockerEngine": "1.13.*", + "dashboard": "kubernetes-dashboard-amd64:v1.8.3", + "exechealthz": "exechealthz-amd64:1.2", + "addon-resizer": "addon-resizer:1.8.1", + "heapster": "heapster-amd64:v1.5.1", + "metrics-server": "metrics-server-amd64:v0.2.1", + "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", + "addon-manager": "kube-addon-manager-amd64:v8.6", + "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", + "pause": "pause-amd64:3.1", + "tiller": "tiller:v2.8.1", + "rescheduler": "rescheduler:v0.3.1", + "aci-connector": "virtual-kubelet:latest", + NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", + "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, + "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, + "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, + "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, + "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), + "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), + "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), + "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), + "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), + "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), + "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), + "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), }, "1.10": { - "dockerEngine": "1.13.*", - "dashboard": "kubernetes-dashboard-amd64:v1.8.3", - "exechealthz": "exechealthz-amd64:1.2", - "addon-resizer": "addon-resizer:1.8.1", - "heapster": "heapster-amd64:v1.5.1", - "metrics-server": "metrics-server-amd64:v0.2.1", - "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", - "addon-manager": "kube-addon-manager-amd64:v8.6", - "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", - "pause": "pause-amd64:3.1", - "tiller": "tiller:v2.8.1", - "rescheduler": "rescheduler:v0.3.1", - "aci-connector": "virtual-kubelet:latest", - "cluster-autoscaler": "cluster-autoscaler:v1.2.2", - "nvidia-device-plugin": "k8s-device-plugin:1.10", - "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, - "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, - "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, - "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, - "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), - "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), - "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), - "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), - "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), - "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), - "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), - "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), + "dockerEngine": "1.13.*", + "dashboard": "kubernetes-dashboard-amd64:v1.8.3", + "exechealthz": "exechealthz-amd64:1.2", + "addon-resizer": "addon-resizer:1.8.1", + "heapster": "heapster-amd64:v1.5.1", + "metrics-server": "metrics-server-amd64:v0.2.1", + "kube-dns": "k8s-dns-kube-dns-amd64:1.14.8", + "addon-manager": "kube-addon-manager-amd64:v8.6", + "dnsmasq": "k8s-dns-dnsmasq-nanny-amd64:1.14.8", + "pause": "pause-amd64:3.1", + "tiller": "tiller:v2.8.1", + "rescheduler": "rescheduler:v0.3.1", + "aci-connector": "virtual-kubelet:latest", + "cluster-autoscaler": "cluster-autoscaler:v1.2.2", + NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", + "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, + "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, + "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, + "routeperiod": DefaultKubernetesCtrlMgrRouteReconciliationPeriod, + "backoffretries": strconv.Itoa(DefaultKubernetesCloudProviderBackoffRetries), + "backoffjitter": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffJitter, 'f', -1, 64), + "backoffduration": strconv.Itoa(DefaultKubernetesCloudProviderBackoffDuration), + "backoffexponent": strconv.FormatFloat(DefaultKubernetesCloudProviderBackoffExponent, 'f', -1, 64), + "ratelimitqps": strconv.FormatFloat(DefaultKubernetesCloudProviderRateLimitQPS, 'f', -1, 64), + "ratelimitbucket": strconv.Itoa(DefaultKubernetesCloudProviderRateLimitBucket), + "gchighthreshold": strconv.Itoa(DefaultKubernetesGCHighThreshold), + "gclowthreshold": strconv.Itoa(DefaultKubernetesGCLowThreshold), }, "1.9": { "dockerEngine": "1.13.*", @@ -244,26 +244,26 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "addonresizer": k8sComponentVersions["1.11"]["addon-resizer"], "heapster": k8sComponentVersions["1.11"]["heapster"], DefaultMetricsServerAddonName: k8sComponentVersions["1.11"]["metrics-server"], - "dns": k8sComponentVersions["1.11"]["kube-dns"], - "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], - "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], - "pause": k8sComponentVersions["1.11"]["pause"], - DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], - DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], - DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], - "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], - "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], - "podeviction": k8sComponentVersions["1.11"]["podeviction"], - "routeperiod": k8sComponentVersions["1.11"]["routeperiod"], - "backoffretries": k8sComponentVersions["1.11"]["backoffretries"], - "backoffjitter": k8sComponentVersions["1.11"]["backoffjitter"], - "backoffduration": k8sComponentVersions["1.11"]["backoffduration"], - "backoffexponent": k8sComponentVersions["1.11"]["backoffexponent"], - "ratelimitqps": k8sComponentVersions["1.11"]["ratelimitqps"], - "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], - "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], - "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], - DefaultNVIDIADevicePluginAddonName: k8sComponentVersions["1.11"]["nvidia-device-plugin"], + "dns": k8sComponentVersions["1.11"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], + "pause": k8sComponentVersions["1.11"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], + "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.11"]["podeviction"], + "routeperiod": k8sComponentVersions["1.11"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.11"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.11"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.11"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.11"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.11"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], } case "1.10": ret = map[string]string{ @@ -276,27 +276,27 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "addonresizer": k8sComponentVersions["1.10"]["addon-resizer"], "heapster": k8sComponentVersions["1.10"]["heapster"], DefaultMetricsServerAddonName: k8sComponentVersions["1.10"]["metrics-server"], - "dns": k8sComponentVersions["1.10"]["kube-dns"], - "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], - "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], - "pause": k8sComponentVersions["1.10"]["pause"], - DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], - DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], - DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], - "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], - "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], - "podeviction": k8sComponentVersions["1.10"]["podeviction"], - "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], - "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], - "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], - "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], - "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], - "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], - "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], - "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], - "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], - DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], - DefaultNVIDIADevicePluginAddonName: k8sComponentVersions["1.10"]["nvidia-device-plugin"], + "dns": k8sComponentVersions["1.10"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], + "pause": k8sComponentVersions["1.10"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], + "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.10"]["podeviction"], + "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], + DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], } case "1.9": ret = map[string]string{ diff --git a/pkg/acsengine/k8s_versions_test.go b/pkg/acsengine/k8s_versions_test.go index 199afeccab..f79082733f 100644 --- a/pkg/acsengine/k8s_versions_test.go +++ b/pkg/acsengine/k8s_versions_test.go @@ -27,6 +27,7 @@ func TestGetK8sVersionComponents(t *testing.T) { DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], "podeviction": k8sComponentVersions["1.11"]["podeviction"], @@ -47,6 +48,49 @@ func TestGetK8sVersionComponents(t *testing.T) { } } + oneDotTenDotZero := getK8sVersionComponents("1.10.0", nil) + if oneDotTenDotZero == nil { + t.Fatalf("getK8sVersionComponents() should not return nil for valid version") + } + expected = map[string]string{ + "hyperkube": "hyperkube-amd64:v1.10.0", + "ccm": "cloud-controller-manager-amd64:v1.10.0", + "windowszip": "v1.10.0-1int.zip", + "dockerEngineVersion": k8sComponentVersions["1.10"]["dockerEngine"], + DefaultDashboardAddonName: k8sComponentVersions["1.10"]["dashboard"], + "exechealthz": k8sComponentVersions["1.10"]["exechealthz"], + "addonresizer": k8sComponentVersions["1.10"]["addon-resizer"], + "heapster": k8sComponentVersions["1.10"]["heapster"], + DefaultMetricsServerAddonName: k8sComponentVersions["1.10"]["metrics-server"], + "dns": k8sComponentVersions["1.10"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], + "pause": k8sComponentVersions["1.10"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], + DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"][DefaultClusterAutoscalerAddonName], + "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.10"]["podeviction"], + "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], + } + + for k, v := range oneDotTenDotZero { + if expected[k] != v { + t.Fatalf("getK8sVersionComponents() returned an unexpected map[string]string value for k8s 1.10.0: %s = %s", k, oneDotTenDotZero[k]) + } + } + oneDotNineDotThree := getK8sVersionComponents("1.9.3", nil) if oneDotNineDotThree == nil { t.Fatalf("getK8sVersionComponents() should not return nil for valid version") diff --git a/pkg/acsengine/params_k8s.go b/pkg/acsengine/params_k8s.go index 718b76d371..941c6669c3 100644 --- a/pkg/acsengine/params_k8s.go +++ b/pkg/acsengine/params_k8s.go @@ -195,13 +195,13 @@ func assignKubernetesParameters(properties *api.Properties, parametersMap params addValue(parametersMap, "kubernetesMetricsServerSpec", cloudSpecConfig.KubernetesSpecConfig.KubernetesImageBase+KubeConfigs[k8sVersion][DefaultMetricsServerAddonName]) } } - nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) - c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, DefaultNVIDIADevicePluginAddonName) + nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) + c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) if c > -1 { if nvidiaDevicePluginAddon.Containers[c].Image != "" { addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", nvidiaDevicePluginAddon.Containers[c].Image) } else { - addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][DefaultNVIDIADevicePluginAddonName]) + addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName]) } } addValue(parametersMap, "kubernetesKubeDNSSpec", cloudSpecConfig.KubernetesSpecConfig.KubernetesImageBase+KubeConfigs[k8sVersion]["dns"]) diff --git a/pkg/acsengine/template_generator.go b/pkg/acsengine/template_generator.go index 7ec9230a2f..b2cbe1d9f3 100644 --- a/pkg/acsengine/template_generator.go +++ b/pkg/acsengine/template_generator.go @@ -749,8 +749,8 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat rC := getAddonContainersIndexByName(reschedulerAddon.Containers, DefaultReschedulerAddonName) metricsServerAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultMetricsServerAddonName) mC := getAddonContainersIndexByName(metricsServerAddon.Containers, DefaultMetricsServerAddonName) - nvidiaDevicePluginAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) - nC := getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, DefaultNVIDIADevicePluginAddonName) + nvidiaDevicePluginAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) + nC := getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) switch attr { case "kubernetesHyperkubeSpec": val = cs.Properties.OrchestratorProfile.KubernetesConfig.KubernetesImageBase + KubeConfigs[k8sVersion]["hyperkube"] @@ -956,7 +956,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat val = nvidiaDevicePluginAddon.Containers[nC].Image } } else { - val = cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase + KubeConfigs[k8sVersion][DefaultNVIDIADevicePluginAddonName] + val = cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase + KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName] } case "kubernetesReschedulerSpec": if rC > -1 { diff --git a/pkg/api/const.go b/pkg/api/const.go index 017963a8bd..89ab1370e1 100644 --- a/pkg/api/const.go +++ b/pkg/api/const.go @@ -117,8 +117,8 @@ const ( DefaultReschedulerAddonName = "rescheduler" // DefaultMetricsServerAddonName is the name of the kubernetes metrics server addon deployment DefaultMetricsServerAddonName = "metrics-server" - // DefaultNVIDIADevicePluginAddonName is the name of the NVIDIA device plugin addon deployment - DefaultNVIDIADevicePluginAddonName = "nvidia-device-plugin" + // NVIDIADevicePluginAddonName is the name of the NVIDIA device plugin addon deployment + NVIDIADevicePluginAddonName = "nvidia-device-plugin" // DefaultPrivateClusterEnabled determines the acs-engine provided default for enabling kubernetes Private Cluster DefaultPrivateClusterEnabled = false // NetworkPolicyAzure is the string expression for the deprecated NetworkPolicy usage pattern "azure" diff --git a/pkg/api/types.go b/pkg/api/types.go index dbf587c4cf..3d4daf189a 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -925,7 +925,7 @@ func (p *Properties) IsNVIDIADevicePluginEnabled() bool { k := p.OrchestratorProfile.KubernetesConfig o := p.OrchestratorProfile for i := range k.Addons { - if k.Addons[i].Name == DefaultNVIDIADevicePluginAddonName { + if k.Addons[i].Name == NVIDIADevicePluginAddonName { nvidiaDevicePluginAddon = k.Addons[i] } } diff --git a/pkg/api/types_test.go b/pkg/api/types_test.go index 3d81352fde..cd825ff9c2 100644 --- a/pkg/api/types_test.go +++ b/pkg/api/types_test.go @@ -238,7 +238,7 @@ func TestIsNVIDIADevicePluginEnabled(t *testing.T) { c := p.OrchestratorProfile.KubernetesConfig c.Addons = []KubernetesAddon{ { - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, Enabled: &b, }, } From e91bc9351a009afa9cc5a4827a334866db579a47 Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Thu, 7 Jun 2018 13:51:46 -0700 Subject: [PATCH 11/18] add nvidia.com/gpu to addons --- .../addons/kubernetesmasteraddons-heapster-deployment.yaml | 4 ++++ .../addons/kubernetesmasteraddons-kube-dns-deployment.yaml | 4 ++++ ...bernetesmasteraddons-kubernetes-dashboard-deployment.yaml | 5 +++++ .../kubernetesmasteraddons-metrics-server-deployment.yaml | 5 +++++ .../k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml | 5 +++++ 5 files changed, 23 insertions(+) diff --git a/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml index 3672ba9dbe..71a3a40015 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml @@ -167,5 +167,9 @@ spec: tolerations: - key: "CriticalAddonsOnly" operator: "Exists" + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule nodeSelector: beta.kubernetes.io/os: linux diff --git a/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml index 5f736b1589..9d98291c25 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml @@ -67,6 +67,10 @@ spec: tolerations: - key: CriticalAddonsOnly operator: Exists + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule volumes: - name: kube-dns-config configMap: diff --git a/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml index 7be73caffa..850b5f49e1 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml @@ -94,6 +94,11 @@ spec: labels: k8s-app: kubernetes-dashboard spec: + tolerations: + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule containers: - args: - --auto-generate-certificates diff --git a/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml index 4545da8c6d..d29e50e045 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml @@ -119,6 +119,11 @@ spec: labels: k8s-app: metrics-server spec: + tolerations: + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule serviceAccountName: metrics-server containers: - name: metrics-server diff --git a/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml index 64831fad10..6e630049a1 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml @@ -60,6 +60,11 @@ spec: app: helm name: tiller spec: + tolerations: + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule serviceAccountName: tiller containers: - env: From bbc2c7ae9340fdfe7d218e5170db40a2a79cf8e3 Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 26 Jun 2018 20:33:32 -0600 Subject: [PATCH 12/18] Revert "add nvidia.com/gpu to addons" This reverts commit e91bc9351a009afa9cc5a4827a334866db579a47. --- .../addons/kubernetesmasteraddons-heapster-deployment.yaml | 4 ---- .../addons/kubernetesmasteraddons-kube-dns-deployment.yaml | 4 ---- ...bernetesmasteraddons-kubernetes-dashboard-deployment.yaml | 5 ----- .../kubernetesmasteraddons-metrics-server-deployment.yaml | 5 ----- .../k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml | 5 ----- 5 files changed, 23 deletions(-) diff --git a/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml index 71a3a40015..3672ba9dbe 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-heapster-deployment.yaml @@ -167,9 +167,5 @@ spec: tolerations: - key: "CriticalAddonsOnly" operator: "Exists" - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule nodeSelector: beta.kubernetes.io/os: linux diff --git a/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml index 9d98291c25..5f736b1589 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kube-dns-deployment.yaml @@ -67,10 +67,6 @@ spec: tolerations: - key: CriticalAddonsOnly operator: Exists - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule volumes: - name: kube-dns-config configMap: diff --git a/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml index 850b5f49e1..7be73caffa 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kubernetes-dashboard-deployment.yaml @@ -94,11 +94,6 @@ spec: labels: k8s-app: kubernetes-dashboard spec: - tolerations: - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule containers: - args: - --auto-generate-certificates diff --git a/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml index d29e50e045..4545da8c6d 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-metrics-server-deployment.yaml @@ -119,11 +119,6 @@ spec: labels: k8s-app: metrics-server spec: - tolerations: - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule serviceAccountName: metrics-server containers: - name: metrics-server diff --git a/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml b/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml index 6e630049a1..64831fad10 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-tiller-deployment.yaml @@ -60,11 +60,6 @@ spec: app: helm name: tiller spec: - tolerations: - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule serviceAccountName: tiller containers: - env: From 2ea40970015daeec32526ec40fb9d1dcd2d5fb6b Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 26 Jun 2018 20:36:32 -0600 Subject: [PATCH 13/18] remove register node with taints --- parts/k8s/kubernetesagentcustomdata.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index e8fc7c7ab1..f167eb9a52 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -137,11 +137,6 @@ write_files: KUBELET_IMAGE={{WrapAsVariable "kubernetesHyperkubeSpec"}} KUBELET_REGISTER_SCHEDULABLE=true KUBELET_NODE_LABELS={{GetAgentKubernetesLabels . "',variables('labelResourceGroup'),'"}} -{{if IsNSeriesSKU .}} - {{if IsNVIDIADevicePluginEnabled}} - KUBELET_REGISTER_WITH_TAINTS=--register-with-taints={{WrapAsVariable "registerWithGpuTaints"}} - {{end}} -{{end}} AGENT_ARTIFACTS_CONFIG_PLACEHOLDER From 6b7af96dc66b6ef5db0f616a479ec9430f865405 Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Tue, 26 Jun 2018 20:37:37 -0600 Subject: [PATCH 14/18] Revert "Add toleration to run kube-proxy on tainted gpu nodes" This reverts commit c9e6dc12d0082b1ce9e16f251bc65e33687ee954. --- .../addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml index b07dddf6f2..1a2adfc837 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-kube-proxy-daemonset.yaml @@ -19,10 +19,6 @@ spec: operator: Equal value: "true" effect: NoSchedule - - key: nvidia.com/gpu - operator: Equal - value: "true" - effect: NoSchedule containers: - command: - "/hyperkube" From 478f39f548cf1aed144e9e20fcf2d2b0eb10c01a Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Wed, 27 Jun 2018 09:51:13 -0700 Subject: [PATCH 15/18] fixed rebase errors --- pkg/acsengine/k8s_versions.go | 25 +++---------------------- pkg/acsengine/params_k8s.go | 16 ++++++++-------- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/pkg/acsengine/k8s_versions.go b/pkg/acsengine/k8s_versions.go index f718db4d86..482c06fbb9 100644 --- a/pkg/acsengine/k8s_versions.go +++ b/pkg/acsengine/k8s_versions.go @@ -25,7 +25,7 @@ var k8sComponentVersions = map[string]map[string]string{ ContainerMonitoringAddonName: "oms:ciprod05082018", AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4", "cluster-autoscaler": "cluster-autoscaler:v1.3.0", - NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", + NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, @@ -256,25 +256,6 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "addonresizer": k8sComponentVersions["1.11"]["addon-resizer"], "heapster": k8sComponentVersions["1.11"]["heapster"], DefaultMetricsServerAddonName: k8sComponentVersions["1.11"]["metrics-server"], - "dns": k8sComponentVersions["1.11"]["kube-dns"], - "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], - "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], - "pause": k8sComponentVersions["1.11"]["pause"], - DefaultTillerAddonName: k8sComponentVersions["1.11"]["tiller"], - DefaultReschedulerAddonName: k8sComponentVersions["1.11"]["rescheduler"], - DefaultACIConnectorAddonName: k8sComponentVersions["1.11"]["aci-connector"], - "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], - "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], - "podeviction": k8sComponentVersions["1.11"]["podeviction"], - "routeperiod": k8sComponentVersions["1.11"]["routeperiod"], - "backoffretries": k8sComponentVersions["1.11"]["backoffretries"], - "backoffjitter": k8sComponentVersions["1.11"]["backoffjitter"], - "backoffduration": k8sComponentVersions["1.11"]["backoffduration"], - "backoffexponent": k8sComponentVersions["1.11"]["backoffexponent"], - "ratelimitqps": k8sComponentVersions["1.11"]["ratelimitqps"], - "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], - "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], - "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], "dns": k8sComponentVersions["1.11"]["kube-dns"], "addonmanager": k8sComponentVersions["1.11"]["addon-manager"], "dnsmasq": k8sComponentVersions["1.11"]["dnsmasq"], @@ -297,7 +278,7 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], - NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], } case "1.10": ret = map[string]string{ @@ -332,7 +313,7 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], - NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], } case "1.9": ret = map[string]string{ diff --git a/pkg/acsengine/params_k8s.go b/pkg/acsengine/params_k8s.go index e2ec12d0cf..2dfdae469f 100644 --- a/pkg/acsengine/params_k8s.go +++ b/pkg/acsengine/params_k8s.go @@ -124,14 +124,14 @@ func assignKubernetesParameters(properties *api.Properties, parametersMap params addValue(parametersMap, "kubernetesMetricsServerSpec", cloudSpecConfig.KubernetesSpecConfig.KubernetesImageBase+KubeConfigs[k8sVersion][DefaultMetricsServerAddonName]) } } - nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) - c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) - if c > -1 { - if nvidiaDeviceginAddon.Containers[c].Image != "" { - addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", nvidiaDevicePluginAddon.Containers[c].Image) - } else { - addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName]) - } + nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) + c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) + if c > -1 { + if nvidiaDevicePluginAddon.Containers[c].Image != "" { + addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", nvidiaDevicePluginAddon.Containers[c].Image) + } else { + addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName]) + } } containerMonitoringAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, ContainerMonitoringAddonName) c = getAddonContainersIndexByName(containerMonitoringAddon.Containers, "omsagent") From 474d0e61bef76bb264241e71a7668bf96988b15c Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Wed, 27 Jun 2018 10:49:27 -0700 Subject: [PATCH 16/18] fix formatting errors --- pkg/acsengine/k8s_versions_test.go | 2 +- pkg/api/types_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/acsengine/k8s_versions_test.go b/pkg/acsengine/k8s_versions_test.go index 59ba6320dc..f1e8db0e41 100644 --- a/pkg/acsengine/k8s_versions_test.go +++ b/pkg/acsengine/k8s_versions_test.go @@ -30,7 +30,7 @@ func TestGetK8sVersionComponents(t *testing.T) { ContainerMonitoringAddonName: k8sComponentVersions["1.11"][ContainerMonitoringAddonName], AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.11"][AzureCNINetworkMonitoringAddonName], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.11"]["cluster-autoscaler"], - NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], "podeviction": k8sComponentVersions["1.11"]["podeviction"], diff --git a/pkg/api/types_test.go b/pkg/api/types_test.go index 4d7c7330d5..5626976f0f 100644 --- a/pkg/api/types_test.go +++ b/pkg/api/types_test.go @@ -899,7 +899,7 @@ func TestIsNVIDIADevicePluginEnabled(t *testing.T) { p.OrchestratorProfile.KubernetesConfig.Addons = []KubernetesAddon{ { Name: NVIDIADevicePluginAddonName, - Enabled: &b, + Enabled: helpers.PointerToBool(false), }, } From cb1486e8313814d71b484bdcddcdea350d4291cc Mon Sep 17 00:00:00 2001 From: Lachlan Evenson Date: Wed, 27 Jun 2018 11:06:09 -0700 Subject: [PATCH 17/18] fixed tests --- pkg/acsengine/k8s_versions_test.go | 46 ++++++++++++++++-------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/pkg/acsengine/k8s_versions_test.go b/pkg/acsengine/k8s_versions_test.go index f1e8db0e41..a63c2a4dd3 100644 --- a/pkg/acsengine/k8s_versions_test.go +++ b/pkg/acsengine/k8s_versions_test.go @@ -65,27 +65,29 @@ func TestGetK8sVersionComponents(t *testing.T) { "addonresizer": k8sComponentVersions["1.10"]["addon-resizer"], "heapster": k8sComponentVersions["1.10"]["heapster"], DefaultMetricsServerAddonName: k8sComponentVersions["1.10"]["metrics-server"], - "dns": k8sComponentVersions["1.10"]["kube-dns"], - "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], - "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], - "pause": k8sComponentVersions["1.10"]["pause"], - DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], - DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], - DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], - NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], - DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"][DefaultClusterAutoscalerAddonName], - "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], - "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], - "podeviction": k8sComponentVersions["1.10"]["podeviction"], - "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], - "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], - "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], - "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], - "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], - "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], - "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], - "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], - "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], + "dns": k8sComponentVersions["1.10"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], + "pause": k8sComponentVersions["1.10"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], + ContainerMonitoringAddonName: k8sComponentVersions["1.10"][ContainerMonitoringAddonName], + AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.10"][AzureCNINetworkMonitoringAddonName], + DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], + "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.10"]["podeviction"], + "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], } for k, v := range oneDotTenDotZero { @@ -115,7 +117,7 @@ func TestGetK8sVersionComponents(t *testing.T) { DefaultTillerAddonName: k8sComponentVersions["1.9"]["tiller"], DefaultReschedulerAddonName: k8sComponentVersions["1.9"]["rescheduler"], DefaultACIConnectorAddonName: k8sComponentVersions["1.9"]["aci-connector"], - ContainerMonitoringAddonName: k8sComponentVersions["1.11"][ContainerMonitoringAddonName], + ContainerMonitoringAddonName: k8sComponentVersions["1.9"][ContainerMonitoringAddonName], AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.9"][AzureCNINetworkMonitoringAddonName], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.9"]["cluster-autoscaler"], "nodestatusfreq": k8sComponentVersions["1.9"]["nodestatusfreq"], From be6ebd71ef325a981eeb1974b79f854162e5cc25 Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Wed, 27 Jun 2018 12:10:40 -0700 Subject: [PATCH 18/18] actually check for nvidia addon enabled --- pkg/api/vlabs/validate.go | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pkg/api/vlabs/validate.go b/pkg/api/vlabs/validate.go index 59395c5b13..baeca617fd 100644 --- a/pkg/api/vlabs/validate.go +++ b/pkg/api/vlabs/validate.go @@ -427,24 +427,26 @@ func (a *Properties) validateAddons() error { return fmt.Errorf("Cluster Autoscaler add-on can only be used with VirtualMachineScaleSets. Please specify \"availabilityProfile\": \"%s\"", VirtualMachineScaleSets) } case "nvidia-device-plugin": - version := common.RationalizeReleaseAndVersion( - a.OrchestratorProfile.OrchestratorType, - a.OrchestratorProfile.OrchestratorRelease, - a.OrchestratorProfile.OrchestratorVersion, - false) - if version == "" { - return fmt.Errorf("the following user supplied OrchestratorProfile configuration is not supported: OrchestratorType: %s, OrchestratorRelease: %s, OrchestratorVersion: %s. Please check supported Release or Version for this build of acs-engine", a.OrchestratorProfile.OrchestratorType, a.OrchestratorProfile.OrchestratorRelease, a.OrchestratorProfile.OrchestratorVersion) - } - sv, err := semver.Make(version) - if err != nil { - return fmt.Errorf("could not validate version %s", version) - } - minVersion, err := semver.Make("1.10.0") - if err != nil { - return fmt.Errorf("could not validate version") - } - if isNSeriesSKU && sv.LT(minVersion) { - return fmt.Errorf("NVIDIA Device Plugin add-on can only be used Kubernetes 1.10 or above. Please specify \"orchestratorRelease\": \"1.10\"") + if helpers.IsTrueBoolPointer(addon.Enabled) { + version := common.RationalizeReleaseAndVersion( + a.OrchestratorProfile.OrchestratorType, + a.OrchestratorProfile.OrchestratorRelease, + a.OrchestratorProfile.OrchestratorVersion, + false) + if version == "" { + return fmt.Errorf("the following user supplied OrchestratorProfile configuration is not supported: OrchestratorType: %s, OrchestratorRelease: %s, OrchestratorVersion: %s. Please check supported Release or Version for this build of acs-engine", a.OrchestratorProfile.OrchestratorType, a.OrchestratorProfile.OrchestratorRelease, a.OrchestratorProfile.OrchestratorVersion) + } + sv, err := semver.Make(version) + if err != nil { + return fmt.Errorf("could not validate version %s", version) + } + minVersion, err := semver.Make("1.10.0") + if err != nil { + return fmt.Errorf("could not validate version") + } + if isNSeriesSKU && sv.LT(minVersion) { + return fmt.Errorf("NVIDIA Device Plugin add-on can only be used Kubernetes 1.10 or above. Please specify \"orchestratorRelease\": \"1.10\"") + } } } }