From 00d762928773e8117bb30a957ef4c4ed672b4b8f Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Mon, 15 Jun 2026 12:16:08 +0200 Subject: [PATCH 1/5] [bm_sno] Discover VirtualMedia member at runtime The InsertMedia and EjectMedia action URLs were hardcoded as VirtualMedia/CD, which is only valid on older iDRAC firmware. Newer firmware versions expose the virtual optical drive under a different member name (e.g. RemovableDisk, 1, 2), causing a 404 ResourceNotFound error on the InsertMedia call. Add bm_discover_vmedia_member.yml that GETs the VirtualMedia collection, selects the first member whose MediaTypes includes CD or DVD (fallback: first member with an InsertMedia action), and exposes _vmedia_member_uri, _vmedia_insert_action, and _vmedia_eject_action for use by other tasks. Update main.yml to run discovery before the eject+insert sequence and use the discovered action URLs. Update bm_eject_vmedia.yml to trigger lazy discovery when the action variable is not already set. Delete stale iDRAC sessions as well. Fix bm_discover_vmedia_target.yml to verify the inserted ISO against the discovered member URI rather than the hardcoded VirtualMedia/CD path. Add cifmw_bm_agent_iso_server_ip to override the IP the iDRAC uses to fetch the agent ISO. Auto-detection picks the default-route address, which is unreachable by iDRAC when the controller runs over VPN. Generated-By: cursor-agent (claude-4.6-sonnet-medium) Signed-off-by: Bohdan Dobrelia --- roles/bm_sno/README.md | 1 + roles/bm_sno/defaults/main.yml | 1 + .../tasks/bm_discover_vmedia_member.yml | 112 ++++++++++++++++++ .../tasks/bm_discover_vmedia_target.yml | 2 +- roles/bm_sno/tasks/bm_eject_vmedia.yml | 41 ++++++- roles/bm_sno/tasks/main.yml | 13 +- 6 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 roles/bm_sno/tasks/bm_discover_vmedia_member.yml diff --git a/roles/bm_sno/README.md b/roles/bm_sno/README.md index 1b2795745..c7e6e6112 100644 --- a/roles/bm_sno/README.md +++ b/roles/bm_sno/README.md @@ -63,6 +63,7 @@ provision IP via `/etc/hosts` entries managed by the role. | `cifmw_bm_agent_live_debug` | bool | `false` | Patch the agent ISO with password, autologin, and systemd debug shell on `tty6` for discovery-phase console access (requires `cifmw_bm_agent_core_password`) | | `cifmw_bm_agent_disabled_ifaces` | list | `[]` | Extra NIC names to disable IPv4/IPv6 on during agent-based install. Prevents overlapping-subnet validation failures when multiple NICs share a native VLAN (e.g. `[eno2]`). The interfaces stay link-up but get no IP address; post-install NNCP configures them. | | `cifmw_bm_agent_lvms_partition` | dict | `{}` | When set, creates an Ignition partition at install time to cap CoreOS rootfs growth and leave unallocated space for the LVMS StorageClass. Keys: `device` (required, e.g. `/dev/nvme0n1`), `rootfs_mib` (default `150000`), `size_mib` (default `0` = rest of disk), `label` (default `lvmstorage`). See [LVMS partition](#lvms-partition). | +| `cifmw_bm_agent_iso_server_ip` | str | `""` | IP address the iDRAC uses to fetch the agent ISO. When empty, the role auto-detects the controller's IP from nodepool metadata or `ansible_default_ipv4.address`. Set this when the auto-detected IP is not reachable by the iDRAC — for example, when running over VPN where the VPN interface IP must be used instead of the default-route IP. | ## Secrets management diff --git a/roles/bm_sno/defaults/main.yml b/roles/bm_sno/defaults/main.yml index d0fd39cf4..271d58619 100644 --- a/roles/bm_sno/defaults/main.yml +++ b/roles/bm_sno/defaults/main.yml @@ -1,5 +1,6 @@ --- cifmw_bm_agent_iso_http_port: 80 +cifmw_bm_agent_iso_server_ip: "" cifmw_bm_agent_installer_timeout: 7200 cifmw_bm_agent_openshift_version: "4.18.3" cifmw_bm_agent_core_password: "redhat" diff --git a/roles/bm_sno/tasks/bm_discover_vmedia_member.yml b/roles/bm_sno/tasks/bm_discover_vmedia_member.yml new file mode 100644 index 000000000..67fe42661 --- /dev/null +++ b/roles/bm_sno/tasks/bm_discover_vmedia_member.yml @@ -0,0 +1,112 @@ +--- +# Discover the correct VirtualMedia member URI for CD/DVD on this iDRAC. +# The member name varies across firmware versions (CD, RemovableDisk, 1, 2, …). +# On iDRAC 10+ the Managers VirtualMedia path requires NumericDynamicSegmentsEnable; +# the Systems path works without it and is tried as a fallback. +# You can manually set it with racadm set Redfish.1.NumericDynamicSegmentsEnable Enabled. +# Sets _vmedia_member_uri, _vmedia_insert_action, _vmedia_eject_action. +# Requires: _bmc_host, _bmc_creds, _redfish_headers + +- name: Fetch VirtualMedia collection (Managers path) + no_log: false + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200, 404] + register: _vmedia_collection_mgr + +- name: Fetch VirtualMedia collection (Systems path fallback) + when: _vmedia_collection_mgr.status == 404 + no_log: false + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/VirtualMedia" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200] + register: _vmedia_collection_sys + +- name: Set active VirtualMedia collection result + ansible.builtin.set_fact: + _vmedia_collection: >- + {{ _vmedia_collection_sys + if (_vmedia_collection_mgr.status == 404) + else _vmedia_collection_mgr }} + +- name: Show VirtualMedia collection source + ansible.builtin.debug: + msg: >- + VirtualMedia collection from + {{ '/redfish/v1/Systems/System.Embedded.1/VirtualMedia' + if (_vmedia_collection_mgr.status == 404) + else '/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia' }} + ({{ _vmedia_collection.json.Members | length }} members) + +- name: Fetch each VirtualMedia member detail + no_log: false + ansible.builtin.uri: + url: "https://{{ _bmc_host }}{{ item['@odata.id'] }}" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200] + register: _vmedia_members + loop: "{{ _vmedia_collection.json.Members }}" + loop_control: + label: "{{ item['@odata.id'] | basename }}" + +- name: Pick the first member that supports CD or DVD media types + ansible.builtin.set_fact: + _vmedia_member_uri: "{{ item.json['@odata.id'] }}" + _vmedia_insert_action: >- + {{ item.json.Actions['#VirtualMedia.InsertMedia'].target }} + _vmedia_eject_action: >- + {{ item.json.Actions['#VirtualMedia.EjectMedia'].target }} + when: + - _vmedia_member_uri is not defined + - item.json.MediaTypes is defined + - item.json.MediaTypes | intersect(['CD', 'DVD']) | length > 0 + loop: "{{ _vmedia_members.results }}" + loop_control: + label: "{{ item.json['@odata.id'] | basename }}" + +- name: Fall back to first member with an InsertMedia action + ansible.builtin.set_fact: + _vmedia_member_uri: "{{ item.json['@odata.id'] }}" + _vmedia_insert_action: >- + {{ item.json.Actions['#VirtualMedia.InsertMedia'].target }} + _vmedia_eject_action: >- + {{ item.json.Actions['#VirtualMedia.EjectMedia'].target }} + when: + - _vmedia_member_uri is not defined + - item.json.Actions is defined + - "'#VirtualMedia.InsertMedia' in item.json.Actions" + loop: "{{ _vmedia_members.results }}" + loop_control: + label: "{{ item.json['@odata.id'] | basename }}" + +- name: Fail if no usable VirtualMedia member found + when: _vmedia_member_uri is not defined + ansible.builtin.fail: + msg: >- + No VirtualMedia member with InsertMedia support found. + Members: {{ _vmedia_members.results | + map(attribute='json') | map(attribute='@odata.id') | list | join(', ') }} + +- name: Show discovered VirtualMedia member + ansible.builtin.debug: + msg: "VirtualMedia member: {{ _vmedia_member_uri }} — insert: {{ _vmedia_insert_action }}" diff --git a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml index 195fbed23..9d2157368 100644 --- a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml +++ b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml @@ -175,7 +175,7 @@ - name: Verify VirtualMedia is still inserted no_log: true ansible.builtin.uri: - url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD" + url: "https://{{ _bmc_host }}{{ _vmedia_member_uri }}" method: GET headers: "{{ _redfish_headers }}" user: "{{ _bmc_creds.username }}" diff --git a/roles/bm_sno/tasks/bm_eject_vmedia.yml b/roles/bm_sno/tasks/bm_eject_vmedia.yml index 4e66b3c54..21c9917f1 100644 --- a/roles/bm_sno/tasks/bm_eject_vmedia.yml +++ b/roles/bm_sno/tasks/bm_eject_vmedia.yml @@ -5,10 +5,49 @@ # Neither Redfish PATCH (405), vmdisconnect (vConsole-only), nor # remoteimage -d reliably release it. A racreset is the only way to # guarantee the stale RFS is fully torn down. +# On iDRAC 10, stale Redfish sessions holding a VirtualMedia device cause +# VRM0021 ("already in use") on the next InsertMedia call. Delete all +# sessions created by prior runs before ejecting. # Requires: _bmc_host, _bmc_creds, _redfish_headers + +- name: Fetch active iDRAC sessions + no_log: true + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/SessionService/Sessions" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 404] + register: _idrac_sessions + failed_when: false + +- name: Delete stale iDRAC sessions + no_log: true + ansible.builtin.uri: + url: "https://{{ _bmc_host }}{{ item['@odata.id'] }}" + method: DELETE + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204, 404] + loop: "{{ _idrac_sessions.json.Members | default([]) }}" + loop_control: + label: "{{ item['@odata.id'] | basename }}" + failed_when: false + when: _idrac_sessions.status == 200 + +- name: Discover VirtualMedia member URI (if not already known) + when: _vmedia_eject_action is not defined + ansible.builtin.include_tasks: bm_discover_vmedia_member.yml + - name: Eject VirtualMedia ansible.builtin.uri: - url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia" + url: "https://{{ _bmc_host }}{{ _vmedia_eject_action }}" method: POST headers: "{{ _redfish_headers }}" body_format: json diff --git a/roles/bm_sno/tasks/main.yml b/roles/bm_sno/tasks/main.yml index 8c472eb92..a6c839ea4 100644 --- a/roles/bm_sno/tasks/main.yml +++ b/roles/bm_sno/tasks/main.yml @@ -265,9 +265,11 @@ - name: Set controller IP fact ansible.builtin.set_fact: _controller_ip: >- - {{ hostvars[inventory_hostname]['nodepool']['interface_ip'] | - default(ansible_default_ipv4.address | - default(ansible_host)) }} + {{ cifmw_bm_agent_iso_server_ip + if cifmw_bm_agent_iso_server_ip | length > 0 + else (hostvars[inventory_hostname]['nodepool']['interface_ip'] | + default(ansible_default_ipv4.address | + default(ansible_host))) }} - name: Show ISO URL that iDRAC will fetch ansible.builtin.debug: @@ -310,13 +312,16 @@ delay: 3 until: _http_check.status == 200 +- name: Discover VirtualMedia member URI + ansible.builtin.include_tasks: bm_discover_vmedia_member.yml + - name: Eject any existing VirtualMedia before insert ansible.builtin.include_tasks: bm_eject_vmedia.yml - name: Insert agent ISO via VirtualMedia no_log: true ansible.builtin.uri: - url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia" + url: "https://{{ _bmc_host }}{{ _vmedia_insert_action }}" method: POST headers: "{{ _redfish_headers }}" body_format: json From 8ad2362242ab576f2563a3d0ae38eb6a20291eb0 Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Mon, 15 Jun 2026 18:06:00 +0200 Subject: [PATCH 2/5] [bm_sno] add cifmw_bm_agent_reuse_vmedia skip mode When the iDRAC cannot fetch the agent ISO over HTTP (network routing, firewall), the only viable path is to upload the ISO through the iDRAC web UI as a local file. The role previously had no way to skip ISO generation and VirtualMedia insertion on a second run Introduce cifmw_bm_agent_reuse_vmedia (default: false). When true, the role skips: stale-state cleanup, ISO generation, live- debug ISO patching, podman HTTP server start/stop, VirtualMedia eject before insert, ISO insertion, and VirtualMedia eject after install. All remaining steps run unchanged: USB boot check, power- off, SSH key and config generation, openshift-install acquisition, LVMS MachineConfig generation, UEFI boot-override discovery, power-on, and install wait. The intended two-run workflow (which rather works the best for wrappers using the bm_sno role instead of CI jobs): - Run 1 (reuse_vmedia=false) generates the ISO and auth artifacts; - the operator uploads the ISO via the iDRAC UI and confirms it is Connected; - run 2 (reuse_vmedia=true) powers the host off, sets the one-time UEFI boot override, powers it back on, and waits for install-complete Assisted-By: claude-4.6-sonnet-medium Signed-off-by: Bohdan Dobrelia --- roles/bm_sno/README.md | 76 +++++++++++++++++++ roles/bm_sno/defaults/main.yml | 8 ++ .../tasks/bm_discover_vmedia_target.yml | 1 + roles/bm_sno/tasks/main.yml | 17 ++++- 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/roles/bm_sno/README.md b/roles/bm_sno/README.md index c7e6e6112..496007290 100644 --- a/roles/bm_sno/README.md +++ b/roles/bm_sno/README.md @@ -63,6 +63,7 @@ provision IP via `/etc/hosts` entries managed by the role. | `cifmw_bm_agent_live_debug` | bool | `false` | Patch the agent ISO with password, autologin, and systemd debug shell on `tty6` for discovery-phase console access (requires `cifmw_bm_agent_core_password`) | | `cifmw_bm_agent_disabled_ifaces` | list | `[]` | Extra NIC names to disable IPv4/IPv6 on during agent-based install. Prevents overlapping-subnet validation failures when multiple NICs share a native VLAN (e.g. `[eno2]`). The interfaces stay link-up but get no IP address; post-install NNCP configures them. | | `cifmw_bm_agent_lvms_partition` | dict | `{}` | When set, creates an Ignition partition at install time to cap CoreOS rootfs growth and leave unallocated space for the LVMS StorageClass. Keys: `device` (required, e.g. `/dev/nvme0n1`), `rootfs_mib` (default `150000`), `size_mib` (default `0` = rest of disk), `label` (default `lvmstorage`). See [LVMS partition](#lvms-partition). | +| `cifmw_bm_agent_reuse_vmedia` | bool | `false` | Skip ISO generation, HTTP server start/stop, and VirtualMedia eject/insert when the agent ISO is already mounted in the iDRAC (e.g. via the iDRAC web UI using a local file). When `true` the role goes straight to setting the one-time boot override and waiting for install. The `openshift-install` binary and working directory from the previous run must still be present on disk. | | `cifmw_bm_agent_iso_server_ip` | str | `""` | IP address the iDRAC uses to fetch the agent ISO. When empty, the role auto-detects the controller's IP from nodepool metadata or `ansible_default_ipv4.address`. Set this when the auto-detected IP is not reachable by the iDRAC — for example, when running over VPN where the VPN interface IP must be used instead of the default-route IP. | ## Secrets management @@ -101,6 +102,81 @@ The agent-based deployment is composed of reusable task files under | `bm_patch_agent_iso.yml` | Patches the agent ISO ignition with core password, autologin, and debug shell on tty6 (used when `cifmw_bm_agent_live_debug` is true) | | `bm_core_password_machineconfig.yml` | Generates a MachineConfig manifest to set the core user password hash post-install | +## Pre-mounted ISO (reuse VirtualMedia mode) + +Use this when the agent ISO cannot be served over HTTP from the Ansible +controller to the iDRAC (for example: the iDRAC is on a network segment +unreachable from the controller, or VirtualMedia HTTP insertion fails +persistently). In this case mount the ISO manually in the iDRAC web UI via +*Virtual Media → Connect Virtual Media → Local File*, then set +`cifmw_bm_agent_reuse_vmedia: true` in your `vars.yaml` (or pass it as an +extra-var) and re-run the playbook. + +### Two-playbook workflow + +**Run 1 — generate the agent ISO** (`cifmw_bm_agent_reuse_vmedia: false`, +the default). Let the playbook run until the ISO is written to disk — you +do not need the VirtualMedia insert to succeed. Abort after the ISO +generation step if needed: + +```yaml +# vars.yaml +cifmw_bm_agent_reuse_vmedia: false # default — explicit for clarity +``` + +After Run 1, the following artifacts exist in +`/artifacts/agent-install/`: + +- `openshift-install` — binary used for `wait-for` in Run 2 +- `agent.x86_64.iso` — copy this to your local machine and upload via + the iDRAC web UI (`Virtual Media → Connect Virtual Media → Local File`) +- `agent_ssh_key` — cluster SSH key used by the installer + +Confirm the iDRAC shows the drive as *Connected* before proceeding. + +**Run 2 — boot from the pre-mounted ISO**: + +```yaml +# vars.yaml (or -e on the ansible-playbook command line) +cifmw_bm_agent_reuse_vmedia: true +``` + +```bash +ansible-playbook -i inventory.yaml playbook.yaml \ + -e cifmw_bm_agent_reuse_vmedia=true +``` + +This run skips ISO generation, the podman HTTP server, and all VirtualMedia +eject/insert steps. It powers the host off, sets the UEFI one-time boot +override to the Virtual Optical Drive, powers the host back on, and waits +for `openshift-install agent wait-for install-complete`. + +### What is skipped with `cifmw_bm_agent_reuse_vmedia: true` + +- Removing stale agent state from the previous run +- ISO generation (`openshift-install agent create image`) +- ISO patching for live debug +- HTTP server start and stop (podman) +- VirtualMedia eject before insert +- VirtualMedia ISO insert +- VirtualMedia eject after install + +### What still runs + +- USB boot BIOS check / enable +- Power-off (so the host boots cleanly from the mounted ISO) +- SSH key generation (idempotent, reuses existing key) +- `openshift-install` binary acquisition (skipped when binary already present) +- Config template generation (idempotent) +- LVMS MachineConfig generation (idempotent) +- UEFI VirtualMedia target discovery and one-time boot override +- Power-on and install wait +- kubeconfig copy + +**Prerequisite**: the `openshift-install` binary and the working directory +(`/artifacts/agent-install/`) from Run 1 must +still be present on disk. + ## openshift-install acquisition The `openshift-install` binary is obtained automatically via one of two diff --git a/roles/bm_sno/defaults/main.yml b/roles/bm_sno/defaults/main.yml index 271d58619..ab5437ce6 100644 --- a/roles/bm_sno/defaults/main.yml +++ b/roles/bm_sno/defaults/main.yml @@ -19,3 +19,11 @@ cifmw_bm_agent_disabled_ifaces: [] # size_mib: 0 # 0 = rest of disk # label: lvmstorage cifmw_bm_agent_lvms_partition: {} + +# Skip ISO generation, HTTP server, and VirtualMedia eject/insert when the +# agent ISO is already mounted in the iDRAC (e.g. via the iDRAC web UI using +# a local file). The playbook will go straight to setting the one-time boot +# override and waiting for the install to complete. +# The openshift-install binary and work directory from the previous run must +# still be present (they are not regenerated in this mode). +cifmw_bm_agent_reuse_vmedia: false diff --git a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml index 9d2157368..54efbd2e5 100644 --- a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml +++ b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml @@ -187,6 +187,7 @@ register: _vmedia_check - name: Assert VirtualMedia ISO is mounted + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.assert: that: - _vmedia_check.json.Inserted | bool diff --git a/roles/bm_sno/tasks/main.yml b/roles/bm_sno/tasks/main.yml index a6c839ea4..05f71ef28 100644 --- a/roles/bm_sno/tasks/main.yml +++ b/roles/bm_sno/tasks/main.yml @@ -190,6 +190,7 @@ mode: "0644" - name: Remove stale agent state from previous runs + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.file: path: "{{ item }}" state: absent @@ -255,11 +256,14 @@ when: cifmw_bm_agent_disabled_ifaces | default([]) | length > 0 - name: Generate agent ISO + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.command: cmd: "{{ _work_dir }}/openshift-install agent create image --dir {{ _work_dir }}" - name: Patch agent ISO ignition for discovery-phase console access - when: cifmw_bm_agent_live_debug | bool + when: + - not cifmw_bm_agent_reuse_vmedia | bool + - cifmw_bm_agent_live_debug | bool ansible.builtin.include_tasks: bm_patch_agent_iso.yml - name: Set controller IP fact @@ -272,10 +276,12 @@ default(ansible_host))) }} - name: Show ISO URL that iDRAC will fetch + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.debug: msg: "ISO URL for iDRAC: http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" - name: Stop any existing agent-iso-server container + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman rm -f agent-iso-server @@ -283,6 +289,7 @@ changed_when: false - name: Serve agent ISO via podman httpd + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: >- @@ -293,6 +300,7 @@ register: _httpd_start - name: Check agent-iso-server container is running + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman ps --filter name=agent-iso-server --format '{{ '{{' }}.Status{{ '}}' }}' @@ -300,10 +308,12 @@ changed_when: false - name: Show container status + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.debug: msg: "agent-iso-server status: {{ _httpd_status.stdout }}" - name: Wait for HTTP server to respond + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.uri: url: "http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" method: HEAD @@ -313,12 +323,15 @@ until: _http_check.status == 200 - name: Discover VirtualMedia member URI + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.include_tasks: bm_discover_vmedia_member.yml - name: Eject any existing VirtualMedia before insert + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.include_tasks: bm_eject_vmedia.yml - name: Insert agent ISO via VirtualMedia + when: not cifmw_bm_agent_reuse_vmedia | bool no_log: true ansible.builtin.uri: url: "https://{{ _bmc_host }}{{ _vmedia_insert_action }}" @@ -408,9 +421,11 @@ mode: "0600" - name: Eject VirtualMedia after install + when: not cifmw_bm_agent_reuse_vmedia | bool ansible.builtin.include_tasks: bm_eject_vmedia.yml - name: Stop HTTP ISO server + when: not cifmw_bm_agent_reuse_vmedia | bool become: true ansible.builtin.command: cmd: podman rm -f agent-iso-server From b8d9ec5beeab978deefd89399c6976d93d39331f Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Wed, 17 Jun 2026 16:59:17 +0200 Subject: [PATCH 3/5] [bm_sno] partial support iDRAC10 Use the standard bm_discover_vmedia_target path for iDRAC9. Add a specific bm_discover_vmedia_target path for iDRAC10. Signed-off-by: Bohdan Dobrelia --- .../tasks/bm_discover_vmedia_target.yml | 50 +++++++---- .../bm_discover_vmedia_target_idrac10.yml | 88 +++++++++++++++++++ 2 files changed, 123 insertions(+), 15 deletions(-) create mode 100644 roles/bm_sno/tasks/bm_discover_vmedia_target_idrac10.yml diff --git a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml index 54efbd2e5..bf9cc75f0 100644 --- a/roles/bm_sno/tasks/bm_discover_vmedia_target.yml +++ b/roles/bm_sno/tasks/bm_discover_vmedia_target.yml @@ -2,6 +2,11 @@ # Discover or validate the UEFI device path for the iDRAC Virtual Optical Drive, # clear any pending iDRAC config jobs, and set a one-time boot override. # Requires: _bmc_host, _bmc_creds, _redfish_headers +# +# Boot override strategy (selected by cifmw_bm_agent_bios_onetimeboot_fqdd): +# unset / empty — standard Redfish PATCH /Systems/System.Embedded.1 (iDRAC ≤ 9) +# set to FQDD — BIOS pending-settings PATCH /Bios/Settings (iDRAC 10+) +# see bm_discover_vmedia_target_idrac10.yml - name: Fetch UEFI boot option IDs no_log: true ansible.builtin.uri: @@ -45,8 +50,12 @@ map(attribute='DisplayName', default='?') | zip(_known_uefi_paths) | map('join', ' -> ') | list }} +# Skip UefiDevicePath validation when using the BIOS pending-settings approach +# (cifmw_bm_agent_bios_onetimeboot_fqdd set) — FQDD is validated by iDRAC itself. - name: Validate user-provided VirtualMedia UEFI path - when: cifmw_bm_agent_vmedia_uefi_path | length > 0 + when: + - cifmw_bm_agent_vmedia_uefi_path | length > 0 + - cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length == 0 ansible.builtin.assert: that: - cifmw_bm_agent_vmedia_uefi_path in _known_uefi_paths @@ -123,7 +132,9 @@ ansible.builtin.pause: seconds: 10 -- name: Set one-time boot from Virtual Optical Drive +# ── Standard one-time boot (iDRAC ≤ 9): PATCH Systems Boot property ────────── +- name: Set one-time boot — standard Redfish PATCH (iDRAC ≤ 9) + when: cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length == 0 no_log: true ansible.builtin.uri: url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" @@ -141,7 +152,9 @@ force_basic_auth: true status_code: [200, 204] -- name: Verify boot override was applied +- name: Verify boot override — standard (iDRAC ≤ 9) + when: cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length == 0 + no_log: true ansible.builtin.uri: url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" method: GET @@ -152,27 +165,34 @@ force_basic_auth: true return_content: true status_code: [200] - register: _boot_verify + register: _boot_verify_standard -- name: Assert boot override is set +- name: Assert boot override is set — standard (iDRAC ≤ 9) + when: cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length == 0 ansible.builtin.assert: that: - - _boot_verify.json.Boot.BootSourceOverrideTarget == 'UefiTarget' - - _boot_verify.json.Boot.BootSourceOverrideEnabled == 'Once' - - _boot_verify.json.Boot.UefiTargetBootSourceOverride | default('') | length > 0 + - _boot_verify_standard.json.Boot.BootSourceOverrideTarget == 'UefiTarget' + - _boot_verify_standard.json.Boot.BootSourceOverrideEnabled == 'Once' + - _boot_verify_standard.json.Boot.UefiTargetBootSourceOverride | default('') | length > 0 fail_msg: >- Boot override not applied. - Target: {{ _boot_verify.json.Boot.BootSourceOverrideTarget }}, - Enabled: {{ _boot_verify.json.Boot.BootSourceOverrideEnabled }}, - UefiPath: {{ _boot_verify.json.Boot.UefiTargetBootSourceOverride | default('empty') }} + Target: {{ _boot_verify_standard.json.Boot.BootSourceOverrideTarget }}, + Enabled: {{ _boot_verify_standard.json.Boot.BootSourceOverrideEnabled }}, + UefiPath: {{ _boot_verify_standard.json.Boot.UefiTargetBootSourceOverride | default('empty') }} -- name: Show resolved boot path +- name: Show resolved boot path — standard (iDRAC ≤ 9) + when: cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length == 0 ansible.builtin.debug: - msg: >- - Resolved boot path: - {{ _boot_verify.json.Boot.UefiTargetBootSourceOverride }} + msg: "Resolved boot path: {{ _boot_verify_standard.json.Boot.UefiTargetBootSourceOverride }}" + +# ── iDRAC 10+ one-time boot: BIOS pending-settings ─────────────────────────── +- name: Set one-time boot via BIOS pending settings (iDRAC 10+) + when: cifmw_bm_agent_bios_onetimeboot_fqdd | default('') | length > 0 + ansible.builtin.include_tasks: + file: bm_discover_vmedia_target_idrac10.yml - name: Verify VirtualMedia is still inserted + when: not cifmw_bm_agent_reuse_vmedia | bool no_log: true ansible.builtin.uri: url: "https://{{ _bmc_host }}{{ _vmedia_member_uri }}" diff --git a/roles/bm_sno/tasks/bm_discover_vmedia_target_idrac10.yml b/roles/bm_sno/tasks/bm_discover_vmedia_target_idrac10.yml new file mode 100644 index 000000000..429f6ac3e --- /dev/null +++ b/roles/bm_sno/tasks/bm_discover_vmedia_target_idrac10.yml @@ -0,0 +1,88 @@ +--- +# One-time boot via BIOS pending-settings for iDRAC 10+. +# +# iDRAC 10 (firmware 1.30.x+) rejects PATCH /Systems/System.Embedded.1 for +# boot override with "Invalid System id: System.Embedded.1". +# The working approach patches BIOS pending-settings instead: +# OneTimeBootMode: OneTimeUefiBootSeq (applied on next reboot) +# OneTimeUefiBootSeqDev: (e.g. GenericUSB.Placeholder.1-1) +# +# Idempotency: SYS011 ("Pending configuration values are already committed") +# is treated as success — values from a prior run are already pending and will +# apply on the next reboot, which is exactly the desired state. +# +# Triggered by: cifmw_bm_agent_bios_onetimeboot_fqdd != '' +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Set one-time boot — BIOS pending settings (iDRAC 10+) + no_log: true + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Bios/Settings" + method: PATCH + headers: "{{ _redfish_headers }}" + body_format: json + body: + Attributes: + OneTimeBootMode: OneTimeUefiBootSeq + OneTimeUefiBootSeqDev: "{{ cifmw_bm_agent_bios_onetimeboot_fqdd }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204, 400] + register: _bios_settings_patch + # SYS011: values already committed from a prior run, pending reboot — idempotent. + failed_when: + - _bios_settings_patch.status == 400 + - >- + _bios_settings_patch.json.error['@Message.ExtendedInfo'] + | selectattr('MessageId', 'equalto', 'IDRAC.2.16.SYS011') + | list | length == 0 + +- name: Report BIOS pending settings already committed (idempotent) + when: + - _bios_settings_patch.status == 400 + - >- + _bios_settings_patch.json.error['@Message.ExtendedInfo'] + | selectattr('MessageId', 'equalto', 'IDRAC.2.16.SYS011') + | list | length > 0 + ansible.builtin.debug: + msg: >- + BIOS one-time boot already pending (SYS011 — values committed from a + previous run, awaiting reboot). + OneTimeUefiBootSeqDev={{ cifmw_bm_agent_bios_onetimeboot_fqdd }} + +# When SYS011 was returned the values are already in a committed job — iDRAC +# removes them from the Bios/Settings pending view, so GET would return an +# empty Attributes dict. Trust the prior run; skip verify/assert. +- name: Verify boot override — BIOS pending settings (iDRAC 10+) + when: _bios_settings_patch.status in [200, 204] + no_log: true + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Bios/Settings" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200] + register: _boot_verify_idrac10 + +- name: Assert boot override is set — BIOS pending settings (iDRAC 10+) + when: _bios_settings_patch.status in [200, 204] + ansible.builtin.assert: + that: + - _boot_verify_idrac10.json.Attributes.OneTimeBootMode == 'OneTimeUefiBootSeq' + - _boot_verify_idrac10.json.Attributes.OneTimeUefiBootSeqDev == cifmw_bm_agent_bios_onetimeboot_fqdd + fail_msg: >- + BIOS pending boot override not applied. + OneTimeBootMode: {{ _boot_verify_idrac10.json.Attributes.OneTimeBootMode | default('empty') }}, + OneTimeUefiBootSeqDev: {{ _boot_verify_idrac10.json.Attributes.OneTimeUefiBootSeqDev | default('empty') }} + +- name: Show resolved boot path — BIOS pending settings (iDRAC 10+) + ansible.builtin.debug: + msg: >- + One-time boot set via BIOS pending settings: + OneTimeUefiBootSeqDev={{ cifmw_bm_agent_bios_onetimeboot_fqdd }} + (applies on next reboot) From 06310aa0c6e70421dc38ec34cce7b50eef1dabb2 Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Thu, 18 Jun 2026 19:32:29 +0200 Subject: [PATCH 4/5] [bm_sno] add more config options for agent Support tagged interface config for agent Support additional NTP servers Signed-off-by: Bohdan Dobrelia --- roles/bm_sno/README.md | 24 ++++++++++++++++++++- roles/bm_sno/tasks/main.yml | 1 + roles/bm_sno/templates/agent_config.yaml.j2 | 19 ++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/roles/bm_sno/README.md b/roles/bm_sno/README.md index 496007290..509912588 100644 --- a/roles/bm_sno/README.md +++ b/roles/bm_sno/README.md @@ -65,6 +65,8 @@ provision IP via `/etc/hosts` entries managed by the role. | `cifmw_bm_agent_lvms_partition` | dict | `{}` | When set, creates an Ignition partition at install time to cap CoreOS rootfs growth and leave unallocated space for the LVMS StorageClass. Keys: `device` (required, e.g. `/dev/nvme0n1`), `rootfs_mib` (default `150000`), `size_mib` (default `0` = rest of disk), `label` (default `lvmstorage`). See [LVMS partition](#lvms-partition). | | `cifmw_bm_agent_reuse_vmedia` | bool | `false` | Skip ISO generation, HTTP server start/stop, and VirtualMedia eject/insert when the agent ISO is already mounted in the iDRAC (e.g. via the iDRAC web UI using a local file). When `true` the role goes straight to setting the one-time boot override and waiting for install. The `openshift-install` binary and working directory from the previous run must still be present on disk. | | `cifmw_bm_agent_iso_server_ip` | str | `""` | IP address the iDRAC uses to fetch the agent ISO. When empty, the role auto-detects the controller's IP from nodepool metadata or `ansible_default_ipv4.address`. Set this when the auto-detected IP is not reachable by the iDRAC — for example, when running over VPN where the VPN interface IP must be used instead of the default-route IP. | +| `cifmw_bm_agent_node_vlan` | int | `0` | 802.1Q VLAN ID for the machine network. When non-zero, the generated `agent-config.yaml` creates a VLAN sub-interface (`.`) on top of `cifmw_bm_agent_node_iface` and assigns the node IP there instead of the bare physical NIC. Set to `0` (default) when the machine-network VLAN arrives untagged (native) at the NIC. | +| `cifmw_bm_agent_additional_ntp_sources` | list | `[]` | NTP server hostnames or IPs added to `additionalNTPSources` in `agent-config.yaml`. These are baked into the agent ISO so `chronyd` can synchronize on first boot even in restricted networks. Without this, the Assisted Installer validation may reject the host with *"Host couldn't synchronize with any NTP server"* (see [KCS 7020898](https://access.redhat.com/solutions/7020898)). Example: `["clock.redhat.com"]`. | ## Secrets management @@ -242,7 +244,7 @@ Test coverage: Minimal vars.yaml for a bare metal SNO deployment: -```YAML +```yaml cifmw_bm_sno: true cifmw_bm_agent_cluster_name: ocp cifmw_bm_agent_base_domain: example.com @@ -258,6 +260,26 @@ cifmw_bm_nodes: root_device: /dev/sda ``` +With a tagged machine-network VLAN and NTP sources (restricted network): + +```yaml +cifmw_bm_sno: true +cifmw_bm_agent_cluster_name: sno +cifmw_bm_agent_base_domain: lab.example.local +cifmw_bm_agent_machine_network: "x.x.x.0/24" +cifmw_bm_agent_node_ip: "x.x.x.101" +cifmw_bm_agent_node_iface: eno17395np0 # physical NIC; VLAN sub-iface created automatically +cifmw_bm_agent_node_vlan: 1073 # machine network arrives 802.1Q-tagged +cifmw_bm_agent_additional_ntp_sources: + - clock.redhat.com +cifmw_bm_agent_bmc_host: x.x.x.151 +cifmw_bm_agent_openshift_version: "4.18.3" + +cifmw_bm_nodes: + - mac: "D4:04:E6:F8:41:50" + root_device: /dev/nvme1n1 +``` + ## Local debugging on an autoheld Zuul node When a Zuul job is held (`autohold`), you can SSH into the Zuul controller diff --git a/roles/bm_sno/tasks/main.yml b/roles/bm_sno/tasks/main.yml index 05f71ef28..7a9e370e0 100644 --- a/roles/bm_sno/tasks/main.yml +++ b/roles/bm_sno/tasks/main.yml @@ -57,6 +57,7 @@ _machine_network: "{{ cifmw_bm_agent_machine_network }}" _node_ip: "{{ cifmw_bm_agent_node_ip }}" _node_iface: "{{ cifmw_bm_agent_node_iface }}" + _node_vlan: "{{ cifmw_bm_agent_node_vlan | default(0) | int }}" _node_mac: "{{ cifmw_bm_nodes[0].mac }}" _bmc_host: "{{ cifmw_bm_agent_bmc_host }}" _iso_http_port: "{{ cifmw_bm_agent_iso_http_port }}" diff --git a/roles/bm_sno/templates/agent_config.yaml.j2 b/roles/bm_sno/templates/agent_config.yaml.j2 index 02f23bf12..dee356512 100644 --- a/roles/bm_sno/templates/agent_config.yaml.j2 +++ b/roles/bm_sno/templates/agent_config.yaml.j2 @@ -2,6 +2,12 @@ apiVersion: v1alpha1 metadata: name: {{ _cluster_name }} rendezvousIP: {{ _node_ip }} +{% if cifmw_bm_agent_additional_ntp_sources | default([]) | length > 0 %} +additionalNTPSources: +{% for src in cifmw_bm_agent_additional_ntp_sources %} + - {{ src }} +{% endfor %} +{% endif %} hosts: - hostname: {{ _cluster_name }}-master-0 role: master @@ -13,11 +19,24 @@ hosts: - name: {{ _node_iface }} type: ethernet state: up + ipv4: + enabled: {{ 'false' if _node_vlan | int > 0 else 'true' }} + dhcp: {{ 'false' if _node_vlan | int > 0 else 'true' }} + ipv6: + enabled: false +{% if _node_vlan | int > 0 %} + - name: {{ _node_iface }}.{{ _node_vlan }} + type: vlan + state: up + vlan: + base-iface: {{ _node_iface }} + id: {{ _node_vlan }} ipv4: enabled: true dhcp: true ipv6: enabled: false +{% endif %} {% for iface in cifmw_bm_agent_disabled_ifaces | default([]) %} - name: {{ iface }} type: ethernet From bc03df6af44ab348c565207ba6b2bf347b4709a4 Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Fri, 19 Jun 2026 10:20:20 +0200 Subject: [PATCH 5/5] [bm_sno] docmunent nuances for ssh access Signed-off-by: Bohdan Dobrelia --- roles/bm_sno/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/roles/bm_sno/README.md b/roles/bm_sno/README.md index 509912588..094e3bc6d 100644 --- a/roles/bm_sno/README.md +++ b/roles/bm_sno/README.md @@ -354,10 +354,15 @@ oc get nodes For ssh access into SNO host: ```bash -ssh -i ~/ci-framework-data/artifacts/agent-install/agent_ssh_key \ +ssh -o IdentitiesOnly=yes \ + -i ~/ci-framework-data/artifacts/agent-install/agent_ssh_key \ core@. ``` +`-o IdentitiesOnly=yes` is required when the local ssh-agent holds many keys — +the server's `MaxAuthTries` limit (default 6) is hit before the explicit key is +tried, resulting in *"Too many authentication failures"*. + Replace `` with the value of `cifmw_bm_agent_cluster_name` (e.g. `sno`).