From 1064066f74868bb35d43869fa4e7f26804974f58 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Wed, 27 May 2026 15:44:21 +0300 Subject: [PATCH 1/4] Require explicit spec.mcr.channel; fix Ubuntu apt-get noninteractive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove the default channel from MCRConfig.SetDefaults() — channel is now a required field. An absent or empty spec.mcr.channel produces a clear fatal error at parse time, consistent with how spec.mke.version is handled. - Add validation in ClusterSpec.UnmarshalYAML: if MCR channel is empty after unmarshal (mcr block absent or channel field omitted), return an actionable error directing the user to set e.g. channel: stable-29.4. - Set DEBIAN_FRONTEND=noninteractive on the apt-get update call in the Ubuntu MCR installer (InstallPackage from rig already sets it for install calls). - Add policy-rc.d comment documenting the deferred dpkg post-install concern. - Update all cluster_test.go fixtures to include mcr.channel: stable. - Add TestMissingMCRChannelFails and TestMCRConfig_ChannelRequired tests. PRODENG-3393 --- PRODENG-3393-plan.md | 87 ++++++++++++++++++++ pkg/configurer/ubuntu/ubuntu.go | 10 ++- pkg/product/common/config/mcr_config.go | 5 +- pkg/product/common/config/mcr_config_test.go | 13 ++- pkg/product/mke/config/cluster_spec.go | 4 + pkg/product/mke/config/cluster_test.go | 66 +++++++++++++-- 6 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 PRODENG-3393-plan.md diff --git a/PRODENG-3393-plan.md b/PRODENG-3393-plan.md new file mode 100644 index 00000000..efc3bbc3 --- /dev/null +++ b/PRODENG-3393-plan.md @@ -0,0 +1,87 @@ +# PRODENG-3393 — launchpad cannot deploy MKE 3.8 using template from the doc + +**Priority:** Unassigned | **Reporter:** Dzmitry Stremkouski | **Customer:** Nordea PreSales + +## Problem + +Users following the MKE 3.8 doc template cannot deploy because: + +1. The doc template omits the `mcr` block entirely, so launchpad uses defaults. +2. In v1.5.15 (when filed), the default MCR version was old (20.10.13) and the + `get.mirantis.com` install script failed with exit status 1 on Ubuntu 22.04. +3. In v1.5.16, `mcr.version` was removed from `MCRConfig`; the install path + switched to direct apt package management (`pkg/configurer/ubuntu/ubuntu.go`). + The failure mode on Ubuntu 22.04 may differ with the new install path. + +Related: PRODENG-3342 (same reporter, same customer, same root MCR install failure). + +## Root Cause Analysis + +### v1.5.15 (filed against) +- `MCRConfig.Version` defaulted to an old pinned value (20.10.13). +- Install used `https://get.mirantis.com/` shell script; exited 1 on Ubuntu 22.04. +- Reporter in PRODENG-3342 noted: "Please disable dpkg script during docker install" + suggesting a dpkg post-install script conflict (likely Docker default bridge + network activation conflicting with the environment). + +### v1.5.16+ (current) +- `mcr.version` field removed; install is now apt-based (`ubuntu.go:InstallMCR`). +- No `DEBIAN_FRONTEND=noninteractive` is set during apt operations. +- `InstallPackage` installs latest from the configured channel with no version pin. +- The dpkg post-install script issue may still be present (Docker daemon starts on + install and can fail or conflict in restricted environments). + +## Acceptance Criteria + +1. `launchpad apply` with a minimal doc-template config (no `mcr` block) on Ubuntu 22.04 + installs MCR successfully. +2. If a dpkg/apt post-install script failure is confirmed, launchpad either suppresses + the daemon start during install or handles the non-zero exit gracefully. +3. No regression on other Linux distros (EL, SLES). + +## Implementation Plan + +### Step 1 — Reproduce and confirm current failure mode +- Run `launchpad apply` on a fresh Ubuntu 22.04 host with no `mcr` block. +- Capture the full install log (not just the retry-exhausted summary). +- Determine if `docker-ee` post-install script is the failure point. + +### Step 2 — Fix Ubuntu apt install (if dpkg is confirmed) +**File:** `pkg/configurer/ubuntu/ubuntu.go` — `InstallMCR` + +Option A (preferred): Set `DEBIAN_FRONTEND=noninteractive` when calling apt and pass +`-o Dpkg::Options::="--force-confold"` to prevent post-install script interaction. + +Option B: Disable the Docker daemon auto-start during install: +```bash +echo "exit 101" | sudo tee /usr/sbin/policy-rc.d && sudo chmod +x /usr/sbin/policy-rc.d +# ... install packages ... +sudo rm -f /usr/sbin/policy-rc.d +``` +Then explicitly start/enable via `EnableMCR`. + +### Step 3 — Fix or update the `InstallPackage` helper +**File:** `pkg/configurer/linux.go` (or ubuntu-specific override) + +Confirm `InstallPackage` passes `-y` and `DEBIAN_FRONTEND=noninteractive`. +If not, fix it here so all package installs benefit. + +### Step 4 — Doc fix +The doc example for MKE 3.8 must include a valid `mcr` block showing the +correct channel and repoURL. File against the docs team or update in-repo examples. +Note: `mcr.version` is no longer a valid field as of v1.5.16 — any existing docs +that show `mcr.version` must be updated. + +### Step 5 — Verification +- Unit test: `pkg/configurer/ubuntu/ubuntu_test.go` — mock exec, assert + `DEBIAN_FRONTEND=noninteractive` is present in install commands. +- Integration: deploy on Ubuntu 22.04 with a bare (no `mcr` block) config. + +## Files in Scope + +| File | Change | +|---|---| +| `pkg/configurer/ubuntu/ubuntu.go` | Fix `InstallMCR` — noninteractive apt / policy-rc.d | +| `pkg/configurer/linux.go` | Audit `InstallPackage` for `-y` / noninteractive flags | +| `pkg/configurer/ubuntu/ubuntu_test.go` | Unit tests for install command construction | +| `examples/` or `docs/` | Update example YAML (no `mcr.version`, correct channel) | diff --git a/pkg/configurer/ubuntu/ubuntu.go b/pkg/configurer/ubuntu/ubuntu.go index a3470109..6cd212af 100644 --- a/pkg/configurer/ubuntu/ubuntu.go +++ b/pkg/configurer/ubuntu/ubuntu.go @@ -65,10 +65,18 @@ Signed-by: /usr/share/keyrings/mirantis-archive-keyring.gpg if err := c.WriteFile(h, debRepoFilePath, debRepo, "0600"); err != nil { return fmt.Errorf("could not write APT repo file for MCR") } - if err := h.Exec("apt-get update", exec.Sudo(h)); err != nil { + if err := h.Exec("DEBIAN_FRONTEND=noninteractive apt-get update", exec.Sudo(h)); err != nil { return fmt.Errorf("could not update apt package info") } + // NOTE: policy-rc.d — the docker-ee dpkg post-install script calls + // `systemctl start docker` immediately on package install. On hosts where + // the daemon fails to start (network conflicts, restricted systemd + // environments), the error is indistinguishable from a genuine + // package-not-found failure. A robust fix would install a + // /usr/sbin/policy-rc.d that exits 101 (deny) before these calls and + // remove it afterward, delegating daemon startup solely to EnableMCR + // below. Not implemented here pending a confirmed reproduction. if err := c.InstallPackage(h, "containerd.io"); err != nil { return fmt.Errorf("package manager could not install containerd.io") } diff --git a/pkg/product/common/config/mcr_config.go b/pkg/product/common/config/mcr_config.go index 85d3e2c7..f1bf7bff 100644 --- a/pkg/product/common/config/mcr_config.go +++ b/pkg/product/common/config/mcr_config.go @@ -58,9 +58,8 @@ func (c *MCRConfig) UnmarshalYAML(unmarshal func(any) error) error { // SetDefaults sets defaults on the object. func (c *MCRConfig) SetDefaults() { // Constants can't be used in tags, so yaml defaults can't be used here. - if c.Channel == "" { - c.Channel = constant.MCRChannel - } + // Note: Channel intentionally has no default — it is required and must be + // set explicitly. See ClusterSpec.UnmarshalYAML for the validation. if c.RepoURL == "" { c.RepoURL = constant.MCRRepoURL diff --git a/pkg/product/common/config/mcr_config_test.go b/pkg/product/common/config/mcr_config_test.go index f65019d3..977aa3b4 100644 --- a/pkg/product/common/config/mcr_config_test.go +++ b/pkg/product/common/config/mcr_config_test.go @@ -10,9 +10,18 @@ import ( commonconfig "github.com/Mirantis/launchpad/pkg/product/common/config" ) +func TestMCRConfig_ChannelRequired(t *testing.T) { + // Channel must not be defaulted when absent — ClusterConfig.Validate() + // catches an empty channel via the validate:"required" struct tag. + cfg := commonconfig.MCRConfig{} + err := yaml.Unmarshal([]byte("repoURL: https://example.com"), &cfg) + require.NoError(t, err) + require.Empty(t, cfg.Channel, "channel must not be defaulted when absent") +} + func TestSwarmInstallFlags(t *testing.T) { cfg := commonconfig.MCRConfig{} - err := yaml.Unmarshal([]byte("swarmInstallFlags:\n - --foo=foofoo\n - --bar barbar\n - --foobar"), &cfg) + err := yaml.Unmarshal([]byte("channel: stable\nswarmInstallFlags:\n - --foo=foofoo\n - --bar barbar\n - --foobar"), &cfg) require.NoError(t, err) require.Equal(t, "--foobar", cfg.SwarmInstallFlags[2]) require.Equal(t, 0, cfg.SwarmInstallFlags.Index("--foo")) @@ -23,7 +32,7 @@ func TestSwarmInstallFlags(t *testing.T) { func TestSwarmUpdateCommands(t *testing.T) { cfg := commonconfig.MCRConfig{} - err := yaml.Unmarshal([]byte("swarmUpdateCommands:\n - command1\n - command2\n - command3"), &cfg) + err := yaml.Unmarshal([]byte("channel: stable\nswarmUpdateCommands:\n - command1\n - command2\n - command3"), &cfg) require.NoError(t, err) require.Equal(t, "command3", cfg.SwarmUpdateCommands[2]) require.Equal(t, 0, slices.Index(cfg.SwarmUpdateCommands, "command1")) diff --git a/pkg/product/mke/config/cluster_spec.go b/pkg/product/mke/config/cluster_spec.go index 990575e1..578d8d2f 100644 --- a/pkg/product/mke/config/cluster_spec.go +++ b/pkg/product/mke/config/cluster_spec.go @@ -176,6 +176,10 @@ func (c *ClusterSpec) UnmarshalYAML(unmarshal func(interface{}) error) error { if err := unmarshal(specAlias); err != nil { return err } + if c.MCR.Channel == "" { + return fmt.Errorf("%w: missing spec.mcr.channel — the mcr block is required; set a channel (e.g. channel: stable-29.4)", errInvalidConfig) + } + if c.Hosts.Count(func(h *Host) bool { return h.Role == "msr" }) > 0 { if specAlias.MSR == nil { diff --git a/pkg/product/mke/config/cluster_test.go b/pkg/product/mke/config/cluster_test.go index c96ef5c7..fccaaa41 100644 --- a/pkg/product/mke/config/cluster_test.go +++ b/pkg/product/mke/config/cluster_test.go @@ -39,14 +39,16 @@ func TestHostRequireManagerValidationPass(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.6" kind: mke spec: + mcr: + channel: stable hosts: - ssh: address: 10.0.0.1 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: manager - ssh: address: 10.0.0.2 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker mke: version: 3.3.7 @@ -63,14 +65,16 @@ func TestHostRequireManagerValidationFail(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.4" kind: mke spec: + mcr: + channel: stable hosts: - ssh: address: 10.0.0.1 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker - ssh: address: 10.0.0.2 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker mke: version: 3.3.7 @@ -87,6 +91,8 @@ func TestNonExistingHostsFails(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.4" kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -98,11 +104,31 @@ spec: validateErrorField(t, err, "Hosts") } +func TestMissingMCRChannelFails(t *testing.T) { + data := ` +apiVersion: launchpad.mirantis.com/mke/v1.6 +kind: mke +spec: + mke: + version: 3.3.7 + hosts: + - ssh: + address: "1.2.3.4" + role: manager +` + c := &ClusterConfig{} + err := yaml.Unmarshal([]byte(strings.ReplaceAll(data, "\t", " ")), c) + require.Error(t, err) + require.ErrorContains(t, err, "spec.mcr.channel") +} + func TestHostAddressValidationWithInvalidIP(t *testing.T) { data := ` apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -122,6 +148,8 @@ func TestHostAddressValidationWithValidIP(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -140,6 +168,8 @@ func TestHostAddressValidationWithInvalidHostname(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -159,6 +189,8 @@ func TestHostAddressValidationWithValidHostname(t *testing.T) { apiVersion: launchpad.mirantis.com/v1 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -177,6 +209,8 @@ func TestHostSshPortValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -196,6 +230,8 @@ func TestHostRoleValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -215,6 +251,8 @@ func TestHostWithComplexMCRConfig(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -312,6 +350,8 @@ func TestHostWinRMCACertPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -332,6 +372,8 @@ func TestHostWinRMCertPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -352,6 +394,8 @@ func TestHostWinRMKeyPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -371,6 +415,8 @@ func TestHostSSHDefaults(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -389,6 +435,8 @@ func TestHostWinRMDefaults(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.6 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -416,6 +464,8 @@ func TestValidationWithMSRRole(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 msr: @@ -423,7 +473,7 @@ spec: hosts: - ssh: address: "10.0.0.1" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: weirdrole - ssh: address: "10.0.0.2" @@ -438,6 +488,8 @@ spec: apiVersion: launchpad.mirantis.com/mke/v1.6 kind: mke+msr spec: + mcr: + channel: stable mke: version: 3.3.7 msr: @@ -445,11 +497,11 @@ spec: hosts: - ssh: address: "10.0.0.1" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: msr - ssh: address: "10.0.0.2" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: manager ` c := loadYaml(t, data) From db30d60778c5d8e86d58e5f471c65e8e5bb14ad1 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 8 Jun 2026 08:53:12 +0300 Subject: [PATCH 2/4] =?UTF-8?q?smoke:=20drop=20rhel8=20from=20legacy=20tes?= =?UTF-8?q?t=20=E2=80=94=20AMI=20unavailable=20in=20us-east-1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: James Nesbitt --- test/smoke/smoke_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index b396432c..8dfdeb3e 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -187,7 +187,7 @@ func TestModernCluster(t *testing.T) { }) } -// TestLegacyCluster exercises rhel8/rocky8/ubuntu22 managers and workers +// TestLegacyCluster exercises rocky8/ubuntu22 managers and workers // with MCR stable-25.0 and MKE 3.8.8. func TestLegacyCluster(t *testing.T) { runSmokeTest(t, smokeConfig{ @@ -197,10 +197,8 @@ func TestLegacyCluster(t *testing.T) { MSRVersion: "2.9.28", SSHKeyAlgorithm: "ed25519", Nodegroups: map[string]interface{}{ - "MngrRhel8": test.Platforms["Rhel8"].GetManager(), "MngrRocky8": test.Platforms["Rocky8"].GetManager(), "MngrUbuntu22": test.Platforms["Ubuntu22"].GetManager(), - "WrkRhel8": test.Platforms["Rhel8"].GetWorker(), "WrkRocky8": test.Platforms["Rocky8"].GetWorker(), "WrkUbuntu22": test.Platforms["Ubuntu22"].GetWorker(), }, From 4cc13034e24ef0da7288a2e3d83a08367502dd3a Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 8 Jun 2026 08:53:41 +0300 Subject: [PATCH 3/4] smoke: note rhel8 removal in TestLegacyCluster comment Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: James Nesbitt --- test/smoke/smoke_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index 8dfdeb3e..973282eb 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -188,7 +188,7 @@ func TestModernCluster(t *testing.T) { } // TestLegacyCluster exercises rocky8/ubuntu22 managers and workers -// with MCR stable-25.0 and MKE 3.8.8. +// with MCR stable-25.0 and MKE 3.8.8. rhel8 was dropped: AMI unavailable in us-east-1. func TestLegacyCluster(t *testing.T) { runSmokeTest(t, smokeConfig{ Name: "legacy", From 9f3cd4754ffb863de8db2baafb61a52c0389447b Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Mon, 8 Jun 2026 11:00:45 +0300 Subject: [PATCH 4/4] smoke: replace rocky8 with rhel8 in legacy test rocky8 AMI (owner 792107900819, Rocky-8-EC2-Base-8.*.x86_64) is no longer available in us-east-1. rhel8 resolves fine and has MCR packages on repos.mirantis.com. AlmaLinux was considered but has no MCR packages. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: James Nesbitt --- test/smoke/smoke_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/smoke/smoke_test.go b/test/smoke/smoke_test.go index 973282eb..be55f4dc 100644 --- a/test/smoke/smoke_test.go +++ b/test/smoke/smoke_test.go @@ -187,8 +187,9 @@ func TestModernCluster(t *testing.T) { }) } -// TestLegacyCluster exercises rocky8/ubuntu22 managers and workers -// with MCR stable-25.0 and MKE 3.8.8. rhel8 was dropped: AMI unavailable in us-east-1. +// TestLegacyCluster exercises rhel8/ubuntu22 managers and workers +// with MCR stable-25.0 and MKE 3.8.8. rocky8 was dropped: AMI unavailable in us-east-1 +// (owner 792107900819 no longer publishes Rocky-8-EC2-Base-8.*.x86_64 there). func TestLegacyCluster(t *testing.T) { runSmokeTest(t, smokeConfig{ Name: "legacy", @@ -197,9 +198,9 @@ func TestLegacyCluster(t *testing.T) { MSRVersion: "2.9.28", SSHKeyAlgorithm: "ed25519", Nodegroups: map[string]interface{}{ - "MngrRocky8": test.Platforms["Rocky8"].GetManager(), + "MngrRhel8": test.Platforms["Rhel8"].GetManager(), "MngrUbuntu22": test.Platforms["Ubuntu22"].GetManager(), - "WrkRocky8": test.Platforms["Rocky8"].GetWorker(), + "WrkRhel8": test.Platforms["Rhel8"].GetWorker(), "WrkUbuntu22": test.Platforms["Ubuntu22"].GetWorker(), }, })