From 0377b56224109773df8547868c6d7977e56fde94 Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Mon, 15 Jun 2026 18:06:00 +0200 Subject: [PATCH] bm_sno: Add cifmw_bm_agent_reuse_vmedia skip mode When the iDRAC cannot fetch the agent ISO over HTTP (network routing, firewall), the only viable path is to upload the ISO through the iDRAC web UI as a local file. The role previously had no way to skip ISO generation and VirtualMedia insertion on a second run Introduce cifmw_bm_agent_reuse_vmedia (default: false). When true, the role skips: stale-state cleanup, ISO generation, live- debug ISO patching, podman HTTP server start/stop, VirtualMedia eject before insert, ISO insertion, and VirtualMedia eject after install. All remaining steps run unchanged: USB boot check, power- off, SSH key and config generation, openshift-install acquisition, LVMS MachineConfig generation, UEFI boot-override discovery, power-on, and install wait. The intended two-run workflow (which rather works the best for wrappers using the bm_sno role instead of CI jobs): - Run 1 (reuse_vmedia=false) generates the ISO and auth artifacts; - the operator uploads the ISO via the iDRAC UI and confirms it is Connected; - run 2 (reuse_vmedia=true) powers the host off, sets the one-time UEFI boot override, powers it back on, and waits for install-complete Assisted-By: claude-4.6-sonnet-medium Signed-off-by: Bohdan Dobrelia --- roles/bm_sno/README.md | 76 ++++++++++++++++++++++++++++++++++ roles/bm_sno/defaults/main.yml | 8 ++++ roles/bm_sno/tasks/main.yml | 18 +++++++- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/roles/bm_sno/README.md b/roles/bm_sno/README.md index 1b2795745..2f87aaec2 100644 --- a/roles/bm_sno/README.md +++ b/roles/bm_sno/README.md @@ -63,6 +63,7 @@ provision IP via `/etc/hosts` entries managed by the role. | `cifmw_bm_agent_live_debug` | bool | `false` | Patch the agent ISO with password, autologin, and systemd debug shell on `tty6` for discovery-phase console access (requires `cifmw_bm_agent_core_password`) | | `cifmw_bm_agent_disabled_ifaces` | list | `[]` | Extra NIC names to disable IPv4/IPv6 on during agent-based install. Prevents overlapping-subnet validation failures when multiple NICs share a native VLAN (e.g. `[eno2]`). The interfaces stay link-up but get no IP address; post-install NNCP configures them. | | `cifmw_bm_agent_lvms_partition` | dict | `{}` | When set, creates an Ignition partition at install time to cap CoreOS rootfs growth and leave unallocated space for the LVMS StorageClass. Keys: `device` (required, e.g. `/dev/nvme0n1`), `rootfs_mib` (default `150000`), `size_mib` (default `0` = rest of disk), `label` (default `lvmstorage`). See [LVMS partition](#lvms-partition). | +| `cifmw_bm_agent_reuse_vmedia` | bool | `false` | Skip ISO generation, HTTP server start/stop, and VirtualMedia eject/insert when the agent ISO is already mounted in the iDRAC (e.g. via the iDRAC web UI using a local file). When `true` the role goes straight to setting the one-time boot override and waiting for install. The `openshift-install` binary and working directory from the previous run must still be present on disk. | ## Secrets management @@ -100,6 +101,81 @@ The agent-based deployment is composed of reusable task files under | `bm_patch_agent_iso.yml` | Patches the agent ISO ignition with core password, autologin, and debug shell on tty6 (used when `cifmw_bm_agent_live_debug` is true) | | `bm_core_password_machineconfig.yml` | Generates a MachineConfig manifest to set the core user password hash post-install | +## Pre-mounted ISO (reuse VirtualMedia mode) + +Use this when the agent ISO cannot be served over HTTP from the Ansible +controller to the iDRAC (for example: the iDRAC is on a network segment +unreachable from the controller, or VirtualMedia HTTP insertion fails +persistently). In this case mount the ISO manually in the iDRAC web UI via +*Virtual Media → Connect Virtual Media → Local File*, then set +`cifmw_bm_agent_reuse_vmedia: true` in your `vars.yaml` (or pass it as an +extra-var) and re-run the playbook. + +### Two-playbook workflow + +**Run 1 — generate the agent ISO** (`cifmw_bm_agent_reuse_vmedia: false`, +the default). Let the playbook run until the ISO is written to disk — you +do not need the VirtualMedia insert to succeed. Abort after the ISO +generation step if needed: + +```yaml +# vars.yaml +cifmw_bm_agent_reuse_vmedia: false # default — explicit for clarity +``` + +After Run 1, the following artifacts exist in +`/artifacts/agent-install/`: + +- `openshift-install` — binary used for `wait-for` in Run 2 +- `agent.x86_64.iso` — copy this to your local machine and upload via + the iDRAC web UI (`Virtual Media → Connect Virtual Media → Local File`) +- `agent_ssh_key` — cluster SSH key used by the installer + +Confirm the iDRAC shows the drive as *Connected* before proceeding. + +**Run 2 — boot from the pre-mounted ISO**: + +```yaml +# vars.yaml (or -e on the ansible-playbook command line) +cifmw_bm_agent_reuse_vmedia: true +``` + +```bash +ansible-playbook -i inventory.yaml playbook.yaml \ + -e cifmw_bm_agent_reuse_vmedia=true +``` + +This run skips ISO generation, the podman HTTP server, and all VirtualMedia +eject/insert steps. It powers the host off, sets the UEFI one-time boot +override to the Virtual Optical Drive, powers the host back on, and waits +for `openshift-install agent wait-for install-complete`. + +### What is skipped with `cifmw_bm_agent_reuse_vmedia: true` + +- Removing stale agent state from the previous run +- ISO generation (`openshift-install agent create image`) +- ISO patching for live debug +- HTTP server start and stop (podman) +- VirtualMedia eject before insert +- VirtualMedia ISO insert +- VirtualMedia eject after install + +### What still runs + +- USB boot BIOS check / enable +- Power-off (so the host boots cleanly from the mounted ISO) +- SSH key generation (idempotent, reuses existing key) +- `openshift-install` binary acquisition (skipped when binary already present) +- Config template generation (idempotent) +- LVMS MachineConfig generation (idempotent) +- UEFI VirtualMedia target discovery and one-time boot override +- Power-on and install wait +- kubeconfig copy + +**Prerequisite**: the `openshift-install` binary and the working directory +(`/artifacts/agent-install/`) from Run 1 must +still be present on disk. + ## openshift-install acquisition The `openshift-install` binary is obtained automatically via one of two diff --git a/roles/bm_sno/defaults/main.yml b/roles/bm_sno/defaults/main.yml index d0fd39cf4..3e1c38a11 100644 --- a/roles/bm_sno/defaults/main.yml +++ b/roles/bm_sno/defaults/main.yml @@ -18,3 +18,11 @@ cifmw_bm_agent_disabled_ifaces: [] # size_mib: 0 # 0 = rest of disk # label: lvmstorage cifmw_bm_agent_lvms_partition: {} + +# Skip ISO generation, HTTP server, and VirtualMedia eject/insert when the +# agent ISO is already mounted in the iDRAC (e.g. via the iDRAC web UI using +# a local file). The playbook will go straight to setting the one-time boot +# override and waiting for the install to complete. +# The openshift-install binary and work directory from the previous run must +# still be present (they are not regenerated in this mode). +cifmw_bm_agent_reuse_vmedia: false diff --git a/roles/bm_sno/tasks/main.yml b/roles/bm_sno/tasks/main.yml index 8c472eb92..c18327739 100644 --- a/roles/bm_sno/tasks/main.yml +++ b/roles/bm_sno/tasks/main.yml @@ -190,6 +190,7 @@ mode: "0644" - name: Remove stale agent state from previous runs + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.file: path: "{{ item }}" state: absent @@ -255,25 +256,30 @@ when: cifmw_bm_agent_disabled_ifaces | default([]) | length > 0 - name: Generate agent ISO + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.command: cmd: "{{ _work_dir }}/openshift-install agent create image --dir {{ _work_dir }}" - name: Patch agent ISO ignition for discovery-phase console access - when: cifmw_bm_agent_live_debug | bool + when: + - not cifmw_bm_agent_reuse_vmedia | bool + - cifmw_bm_agent_live_debug | bool ansible.builtin.include_tasks: bm_patch_agent_iso.yml - name: Set controller IP fact ansible.builtin.set_fact: _controller_ip: >- {{ hostvars[inventory_hostname]['nodepool']['interface_ip'] | - default(ansible_default_ipv4.address | + default(ansible_default_ipv4.address | default(ansible_host)) }} - name: Show ISO URL that iDRAC will fetch + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.debug: msg: "ISO URL for iDRAC: http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" - name: Stop any existing agent-iso-server container + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman rm -f agent-iso-server @@ -281,6 +287,7 @@ changed_when: false - name: Serve agent ISO via podman httpd + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: >- @@ -291,6 +298,7 @@ register: _httpd_start - name: Check agent-iso-server container is running + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman ps --filter name=agent-iso-server --format '{{ '{{' }}.Status{{ '}}' }}' @@ -298,10 +306,12 @@ changed_when: false - name: Show container status + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.debug: msg: "agent-iso-server status: {{ _httpd_status.stdout }}" - name: Wait for HTTP server to respond + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.uri: url: "http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" method: HEAD @@ -311,9 +321,11 @@ until: _http_check.status == 200 - name: Eject any existing VirtualMedia before insert + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.include_tasks: bm_eject_vmedia.yml - name: Insert agent ISO via VirtualMedia + when: not cifmw_bm_agent_reuse_vmedia | bool no_log: true ansible.builtin.uri: url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia" @@ -403,9 +415,11 @@ mode: "0600" - name: Eject VirtualMedia after install + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.include_tasks: bm_eject_vmedia.yml - name: Stop HTTP ISO server + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman rm -f agent-iso-server