Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions crates/admin-cli/src/component_manager/update_firmware/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ pub struct SwitchArgs {
help = "NVLink switch components to update; omit to update all supported components"
)]
pub components: Vec<NvSwitchComponentArg>,

#[clap(
long = "bypass-state-controller",
help = "Bypass the state controller and dispatch directly to the component backend"
)]
pub bypass_state_controller: bool,
}

#[derive(ClapArgs, Debug)]
Expand All @@ -75,6 +81,12 @@ pub struct PowerShelfArgs {
help = "Power shelf components to update; omit to update all supported components"
)]
pub components: Vec<PowerShelfComponentArg>,

#[clap(
long = "bypass-state-controller",
help = "Bypass the state controller and dispatch directly to the component backend"
)]
pub bypass_state_controller: bool,
}

#[derive(ClapArgs, Debug)]
Expand All @@ -92,6 +104,12 @@ pub struct ComputeTrayArgs {
help = "Compute tray components to update; omit to update all supported components"
)]
pub components: Vec<ComputeTrayComponentArg>,

#[clap(
long = "bypass-state-controller",
help = "Bypass the state controller and dispatch directly to the component backend"
)]
pub bypass_state_controller: bool,
}

#[derive(ClapArgs, Debug)]
Expand All @@ -108,6 +126,7 @@ impl From<Args> for rpc::forge::UpdateComponentFirmwareRequest {
match args.target {
Target::Switch(target) => Self {
target_version: target.target_version,
bypass_state_controller: target.bypass_state_controller,
target: Some(
rpc::forge::update_component_firmware_request::Target::Switches(
rpc::forge::UpdateSwitchFirmwareTarget {
Expand All @@ -125,6 +144,7 @@ impl From<Args> for rpc::forge::UpdateComponentFirmwareRequest {
},
Target::PowerShelf(target) => Self {
target_version: target.target_version,
bypass_state_controller: target.bypass_state_controller,
target: Some(
rpc::forge::update_component_firmware_request::Target::PowerShelves(
rpc::forge::UpdatePowerShelfFirmwareTarget {
Expand All @@ -142,6 +162,7 @@ impl From<Args> for rpc::forge::UpdateComponentFirmwareRequest {
},
Target::ComputeTray(target) => Self {
target_version: target.target_version,
bypass_state_controller: target.bypass_state_controller,
target: Some(
rpc::forge::update_component_firmware_request::Target::ComputeTrays(
rpc::forge::UpdateComputeTrayFirmwareTarget {
Expand All @@ -159,6 +180,7 @@ impl From<Args> for rpc::forge::UpdateComponentFirmwareRequest {
},
Target::Rack(target) => Self {
target_version: target.target_version,
bypass_state_controller: false,
target: Some(
rpc::forge::update_component_firmware_request::Target::Racks(
rpc::forge::UpdateRackFirmwareTarget {
Expand Down
32 changes: 29 additions & 3 deletions crates/api/src/handlers/component_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,13 +873,20 @@ pub(crate) async fn component_power_control(
let req = request.into_inner();

let action = map_power_action(req.action)?;
let bypass_state_controller = req.bypass_state_controller;

let target = req
.target
.ok_or_else(|| Status::invalid_argument("target is required"))?;

let (results, exploration_ips) = match target {
rpc::component_power_control_request::Target::SwitchIds(list) => {
if cm.nv_switch_use_state_controller && !bypass_state_controller {
// TODO: implement state controller path for switch power control
return Err(Status::unimplemented(
"switch power control through the state controller is not yet supported",
));
}
let endpoints = resolve_switch_endpoints(api, &list.ids).await?;

let mut results: Vec<_> = endpoints
Expand Down Expand Up @@ -918,6 +925,12 @@ pub(crate) async fn component_power_control(
(results, ips)
}
rpc::component_power_control_request::Target::PowerShelfIds(list) => {
if cm.power_shelf_use_state_controller && !bypass_state_controller {
// TODO: implement state controller path for power shelf power control
return Err(Status::unimplemented(
"power shelf power control through the state controller is not yet supported",
));
}
let endpoints = resolve_power_shelf_endpoints(api, &list.ids).await?;

let mut results: Vec<_> = endpoints
Expand Down Expand Up @@ -956,7 +969,7 @@ pub(crate) async fn component_power_control(
(results, ips)
}
rpc::component_power_control_request::Target::MachineIds(list) => {
if cm.compute_tray_use_state_controller {
if cm.compute_tray_use_state_controller && !bypass_state_controller {
// TODO: implement state controller path for compute tray power control
return Err(Status::unimplemented(
"compute tray power control through the state controller is not yet supported",
Expand Down Expand Up @@ -1280,6 +1293,7 @@ pub(crate) async fn update_component_firmware(
) -> Result<Response<rpc::UpdateComponentFirmwareResponse>, Status> {
log_request_data(&request);
let req = request.into_inner();
let bypass_state_controller = req.bypass_state_controller;

let target = req
.target
Expand All @@ -1302,7 +1316,7 @@ pub(crate) async fn update_component_firmware(
return Err(Status::invalid_argument("switch_ids must not be empty"));
}

if cm.nv_switch_use_state_controller {
if cm.nv_switch_use_state_controller && !bypass_state_controller {
component_names = map_nv_switch_component_names(&t.components)?;

let mut txn =
Expand Down Expand Up @@ -1367,7 +1381,7 @@ pub(crate) async fn update_component_firmware(
return Err(Status::invalid_argument("machine_ids must not be empty"));
}

if cm.compute_tray_use_state_controller {
if cm.compute_tray_use_state_controller && !bypass_state_controller {
component_names = map_compute_tray_component_names(&t.components)?;

let machine = db::machine::find_one(
Expand Down Expand Up @@ -1422,6 +1436,12 @@ pub(crate) async fn update_component_firmware(
}
rpc::update_component_firmware_request::Target::PowerShelves(t) => {
let cm = require_component_manager(api)?;
if cm.power_shelf_use_state_controller && !bypass_state_controller {
// TODO: implement state controller path for power shelf firmware updates
return Err(Status::unimplemented(
"power shelf firmware updates through the state controller are not yet supported",
));
}
let list = t
.power_shelf_ids
.ok_or_else(|| Status::invalid_argument("power_shelf_ids is required"))?;
Expand Down Expand Up @@ -1454,6 +1474,12 @@ pub(crate) async fn update_component_firmware(
power_shelf_results = Some(results);
}
rpc::update_component_firmware_request::Target::Racks(t) => {
if bypass_state_controller {
// TODO: implement RMS backend direct dispatch for a full rack
return Err(Status::invalid_argument(
"bypass_state_controller is not supported for rack-level firmware updates",
));
}
let list = t
.rack_ids
.ok_or_else(|| Status::invalid_argument("rack_ids is required"))?;
Expand Down
6 changes: 6 additions & 0 deletions crates/rpc/proto/forge.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8063,6 +8063,9 @@ message ComponentPowerControlRequest {
PowerShelfIdList power_shelf_ids = 3;
}
common.SystemPowerControl action = 4;
// When true, bypass the state controller and dispatch directly to the
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @shayan1995 for visibility on proto changes in #1770

// component backend.
bool bypass_state_controller = 5;
}

message ComponentPowerControlResponse {
Expand Down Expand Up @@ -8145,6 +8148,9 @@ message UpdateComponentFirmwareRequest {
UpdateRackFirmwareTarget racks = 5;
}
string target_version = 4;
// When true, bypass the state controller and dispatch directly to the
// component backend.
bool bypass_state_controller = 6;
}

message UpdateComponentFirmwareResponse {
Expand Down
Loading