From 728e8f2f0f671a07d57ed23cc763d7c5bd014895 Mon Sep 17 00:00:00 2001 From: Srikar Date: Tue, 19 May 2026 03:39:48 -0700 Subject: [PATCH 1/2] feat: add a force_direct flag to the component management API that allows clients to d bypass the state controller and directly dispatch power and firmware control to the component backend (RMS, NSM, PSM etc) --- .../component_manager/update_firmware/args.rs | 22 +++++++++++++ crates/api/src/handlers/component_manager.rs | 32 +++++++++++++++++-- crates/rpc/proto/forge.proto | 7 ++++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/crates/admin-cli/src/component_manager/update_firmware/args.rs b/crates/admin-cli/src/component_manager/update_firmware/args.rs index 70a6482f81..6a3cbf9454 100644 --- a/crates/admin-cli/src/component_manager/update_firmware/args.rs +++ b/crates/admin-cli/src/component_manager/update_firmware/args.rs @@ -58,6 +58,12 @@ pub struct SwitchArgs { help = "NVLink switch components to update; omit to update all supported components" )] pub components: Vec, + + #[clap( + long = "force-direct", + help = "Bypass the state controller and dispatch directly to the component backend" + )] + pub force_direct: bool, } #[derive(ClapArgs, Debug)] @@ -75,6 +81,12 @@ pub struct PowerShelfArgs { help = "Power shelf components to update; omit to update all supported components" )] pub components: Vec, + + #[clap( + long = "force-direct", + help = "Bypass the state controller and dispatch directly to the component backend" + )] + pub force_direct: bool, } #[derive(ClapArgs, Debug)] @@ -92,6 +104,12 @@ pub struct ComputeTrayArgs { help = "Compute tray components to update; omit to update all supported components" )] pub components: Vec, + + #[clap( + long = "force-direct", + help = "Bypass the state controller and dispatch directly to the component backend" + )] + pub force_direct: bool, } #[derive(ClapArgs, Debug)] @@ -108,6 +126,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { match args.target { Target::Switch(target) => Self { target_version: target.target_version, + force_direct: target.force_direct, target: Some( rpc::forge::update_component_firmware_request::Target::Switches( rpc::forge::UpdateSwitchFirmwareTarget { @@ -125,6 +144,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::PowerShelf(target) => Self { target_version: target.target_version, + force_direct: target.force_direct, target: Some( rpc::forge::update_component_firmware_request::Target::PowerShelves( rpc::forge::UpdatePowerShelfFirmwareTarget { @@ -142,6 +162,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::ComputeTray(target) => Self { target_version: target.target_version, + force_direct: target.force_direct, target: Some( rpc::forge::update_component_firmware_request::Target::ComputeTrays( rpc::forge::UpdateComputeTrayFirmwareTarget { @@ -159,6 +180,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::Rack(target) => Self { target_version: target.target_version, + force_direct: false, target: Some( rpc::forge::update_component_firmware_request::Target::Racks( rpc::forge::UpdateRackFirmwareTarget { diff --git a/crates/api/src/handlers/component_manager.rs b/crates/api/src/handlers/component_manager.rs index 48cac3ebf4..3b3bfa9e37 100644 --- a/crates/api/src/handlers/component_manager.rs +++ b/crates/api/src/handlers/component_manager.rs @@ -873,6 +873,7 @@ pub(crate) async fn component_power_control( let req = request.into_inner(); let action = map_power_action(req.action)?; + let force_direct = req.force_direct; let target = req .target @@ -880,6 +881,12 @@ pub(crate) async fn component_power_control( let (results, exploration_ips) = match target { rpc::component_power_control_request::Target::SwitchIds(list) => { + if cm.nv_switch_use_state_controller && !force_direct { + // TODO: implement state controller path for switch power control + return Err(Status::unimplemented( + "switch power control through the state controller is not yet supported", + )); + } let endpoints = resolve_switch_endpoints(api, &list.ids).await?; let mut results: Vec<_> = endpoints @@ -918,6 +925,12 @@ pub(crate) async fn component_power_control( (results, ips) } rpc::component_power_control_request::Target::PowerShelfIds(list) => { + if cm.power_shelf_use_state_controller && !force_direct { + // TODO: implement state controller path for power shelf power control + return Err(Status::unimplemented( + "power shelf power control through the state controller is not yet supported", + )); + } let endpoints = resolve_power_shelf_endpoints(api, &list.ids).await?; let mut results: Vec<_> = endpoints @@ -956,7 +969,7 @@ pub(crate) async fn component_power_control( (results, ips) } rpc::component_power_control_request::Target::MachineIds(list) => { - if cm.compute_tray_use_state_controller { + if cm.compute_tray_use_state_controller && !force_direct { // TODO: implement state controller path for compute tray power control return Err(Status::unimplemented( "compute tray power control through the state controller is not yet supported", @@ -1280,6 +1293,7 @@ pub(crate) async fn update_component_firmware( ) -> Result, Status> { log_request_data(&request); let req = request.into_inner(); + let force_direct = req.force_direct; let target = req .target @@ -1302,7 +1316,7 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("switch_ids must not be empty")); } - if cm.nv_switch_use_state_controller { + if cm.nv_switch_use_state_controller && !force_direct { component_names = map_nv_switch_component_names(&t.components)?; let mut txn = @@ -1367,7 +1381,7 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("machine_ids must not be empty")); } - if cm.compute_tray_use_state_controller { + if cm.compute_tray_use_state_controller && !force_direct { component_names = map_compute_tray_component_names(&t.components)?; let machine = db::machine::find_one( @@ -1422,6 +1436,12 @@ pub(crate) async fn update_component_firmware( } rpc::update_component_firmware_request::Target::PowerShelves(t) => { let cm = require_component_manager(api)?; + if cm.power_shelf_use_state_controller && !force_direct { + // TODO: implement state controller path for power shelf firmware updates + return Err(Status::unimplemented( + "power shelf firmware updates through the state controller are not yet supported", + )); + } let list = t .power_shelf_ids .ok_or_else(|| Status::invalid_argument("power_shelf_ids is required"))?; @@ -1454,6 +1474,12 @@ pub(crate) async fn update_component_firmware( power_shelf_results = Some(results); } rpc::update_component_firmware_request::Target::Racks(t) => { + if force_direct { + // TODO: implement RMS backend direct dispatch for a full rack + return Err(Status::invalid_argument( + "force_direct is not supported for rack-level firmware updates", + )); + } let list = t .rack_ids .ok_or_else(|| Status::invalid_argument("rack_ids is required"))?; diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 1491c20280..d997bf7fd6 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -8037,6 +8037,9 @@ message ComponentPowerControlRequest { PowerShelfIdList power_shelf_ids = 3; } common.SystemPowerControl action = 4; + // When true, bypass the state controller and dispatch directly to the + // configured HAL backend (NSM, PSM, Redfish, etc.). + bool force_direct = 5; } message ComponentPowerControlResponse { @@ -8119,6 +8122,10 @@ message UpdateComponentFirmwareRequest { UpdateRackFirmwareTarget racks = 5; } string target_version = 4; + // When true, bypass the state controller and dispatch directly to the + // configured HAL backend (NSM, PSM, Redfish, etc.). + // Not supported for rack-level firmware updates. + bool force_direct = 6; } message UpdateComponentFirmwareResponse { From 27da8ad5cd0162e56b096c46d0882ec1b75d35f0 Mon Sep 17 00:00:00 2001 From: Kun Zhao Date: Fri, 22 May 2026 16:26:16 -0700 Subject: [PATCH 2/2] refactor: rename force_direct to bypass_state_controller Signed-off-by: Kun Zhao --- .../component_manager/update_firmware/args.rs | 20 +++++++++---------- crates/api/src/handlers/component_manager.rs | 20 +++++++++---------- crates/rpc/proto/forge.proto | 9 ++++----- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/crates/admin-cli/src/component_manager/update_firmware/args.rs b/crates/admin-cli/src/component_manager/update_firmware/args.rs index 6a3cbf9454..fb1e418813 100644 --- a/crates/admin-cli/src/component_manager/update_firmware/args.rs +++ b/crates/admin-cli/src/component_manager/update_firmware/args.rs @@ -60,10 +60,10 @@ pub struct SwitchArgs { pub components: Vec, #[clap( - long = "force-direct", + long = "bypass-state-controller", help = "Bypass the state controller and dispatch directly to the component backend" )] - pub force_direct: bool, + pub bypass_state_controller: bool, } #[derive(ClapArgs, Debug)] @@ -83,10 +83,10 @@ pub struct PowerShelfArgs { pub components: Vec, #[clap( - long = "force-direct", + long = "bypass-state-controller", help = "Bypass the state controller and dispatch directly to the component backend" )] - pub force_direct: bool, + pub bypass_state_controller: bool, } #[derive(ClapArgs, Debug)] @@ -106,10 +106,10 @@ pub struct ComputeTrayArgs { pub components: Vec, #[clap( - long = "force-direct", + long = "bypass-state-controller", help = "Bypass the state controller and dispatch directly to the component backend" )] - pub force_direct: bool, + pub bypass_state_controller: bool, } #[derive(ClapArgs, Debug)] @@ -126,7 +126,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { match args.target { Target::Switch(target) => Self { target_version: target.target_version, - force_direct: target.force_direct, + bypass_state_controller: target.bypass_state_controller, target: Some( rpc::forge::update_component_firmware_request::Target::Switches( rpc::forge::UpdateSwitchFirmwareTarget { @@ -144,7 +144,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::PowerShelf(target) => Self { target_version: target.target_version, - force_direct: target.force_direct, + bypass_state_controller: target.bypass_state_controller, target: Some( rpc::forge::update_component_firmware_request::Target::PowerShelves( rpc::forge::UpdatePowerShelfFirmwareTarget { @@ -162,7 +162,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::ComputeTray(target) => Self { target_version: target.target_version, - force_direct: target.force_direct, + bypass_state_controller: target.bypass_state_controller, target: Some( rpc::forge::update_component_firmware_request::Target::ComputeTrays( rpc::forge::UpdateComputeTrayFirmwareTarget { @@ -180,7 +180,7 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, Target::Rack(target) => Self { target_version: target.target_version, - force_direct: false, + bypass_state_controller: false, target: Some( rpc::forge::update_component_firmware_request::Target::Racks( rpc::forge::UpdateRackFirmwareTarget { diff --git a/crates/api/src/handlers/component_manager.rs b/crates/api/src/handlers/component_manager.rs index 3b3bfa9e37..ee2b0e97f0 100644 --- a/crates/api/src/handlers/component_manager.rs +++ b/crates/api/src/handlers/component_manager.rs @@ -873,7 +873,7 @@ pub(crate) async fn component_power_control( let req = request.into_inner(); let action = map_power_action(req.action)?; - let force_direct = req.force_direct; + let bypass_state_controller = req.bypass_state_controller; let target = req .target @@ -881,7 +881,7 @@ pub(crate) async fn component_power_control( let (results, exploration_ips) = match target { rpc::component_power_control_request::Target::SwitchIds(list) => { - if cm.nv_switch_use_state_controller && !force_direct { + if cm.nv_switch_use_state_controller && !bypass_state_controller { // TODO: implement state controller path for switch power control return Err(Status::unimplemented( "switch power control through the state controller is not yet supported", @@ -925,7 +925,7 @@ pub(crate) async fn component_power_control( (results, ips) } rpc::component_power_control_request::Target::PowerShelfIds(list) => { - if cm.power_shelf_use_state_controller && !force_direct { + if cm.power_shelf_use_state_controller && !bypass_state_controller { // TODO: implement state controller path for power shelf power control return Err(Status::unimplemented( "power shelf power control through the state controller is not yet supported", @@ -969,7 +969,7 @@ pub(crate) async fn component_power_control( (results, ips) } rpc::component_power_control_request::Target::MachineIds(list) => { - if cm.compute_tray_use_state_controller && !force_direct { + if cm.compute_tray_use_state_controller && !bypass_state_controller { // TODO: implement state controller path for compute tray power control return Err(Status::unimplemented( "compute tray power control through the state controller is not yet supported", @@ -1293,7 +1293,7 @@ pub(crate) async fn update_component_firmware( ) -> Result, Status> { log_request_data(&request); let req = request.into_inner(); - let force_direct = req.force_direct; + let bypass_state_controller = req.bypass_state_controller; let target = req .target @@ -1316,7 +1316,7 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("switch_ids must not be empty")); } - if cm.nv_switch_use_state_controller && !force_direct { + if cm.nv_switch_use_state_controller && !bypass_state_controller { component_names = map_nv_switch_component_names(&t.components)?; let mut txn = @@ -1381,7 +1381,7 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("machine_ids must not be empty")); } - if cm.compute_tray_use_state_controller && !force_direct { + if cm.compute_tray_use_state_controller && !bypass_state_controller { component_names = map_compute_tray_component_names(&t.components)?; let machine = db::machine::find_one( @@ -1436,7 +1436,7 @@ pub(crate) async fn update_component_firmware( } rpc::update_component_firmware_request::Target::PowerShelves(t) => { let cm = require_component_manager(api)?; - if cm.power_shelf_use_state_controller && !force_direct { + if cm.power_shelf_use_state_controller && !bypass_state_controller { // TODO: implement state controller path for power shelf firmware updates return Err(Status::unimplemented( "power shelf firmware updates through the state controller are not yet supported", @@ -1474,10 +1474,10 @@ pub(crate) async fn update_component_firmware( power_shelf_results = Some(results); } rpc::update_component_firmware_request::Target::Racks(t) => { - if force_direct { + if bypass_state_controller { // TODO: implement RMS backend direct dispatch for a full rack return Err(Status::invalid_argument( - "force_direct is not supported for rack-level firmware updates", + "bypass_state_controller is not supported for rack-level firmware updates", )); } let list = t diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index d997bf7fd6..096af07112 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -8038,8 +8038,8 @@ message ComponentPowerControlRequest { } common.SystemPowerControl action = 4; // When true, bypass the state controller and dispatch directly to the - // configured HAL backend (NSM, PSM, Redfish, etc.). - bool force_direct = 5; + // component backend. + bool bypass_state_controller = 5; } message ComponentPowerControlResponse { @@ -8123,9 +8123,8 @@ message UpdateComponentFirmwareRequest { } string target_version = 4; // When true, bypass the state controller and dispatch directly to the - // configured HAL backend (NSM, PSM, Redfish, etc.). - // Not supported for rack-level firmware updates. - bool force_direct = 6; + // component backend. + bool bypass_state_controller = 6; } message UpdateComponentFirmwareResponse {