diff --git a/CHANGELOG.md b/CHANGELOG.md index b8c9ec0b80a..8f3b96199f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,10 +23,18 @@ Release channels have their own copy of this changelog: `getLatestBlockhash` response together with its context (notably `context.slot`). ### Validator #### Breaking +* XDP transmit is now enabled by default on Linux in copy mode on an auto-selected CPU + separate from the PoH pinned CPU core. Use `--xdp-cpu-cores` to override the XDP + CPU assignment; configured XDP cores must not include the PoH pinned CPU core. + Use `--xdp-zero-copy` with `--xdp-cpu-cores` to opt in to zero copy. When using + zero copy with a bonded interface, pass `--xdp-interface` with the underlying member + interface. Default validator startup now requires the XDP copy-mode capabilities. +* The default PoH pinned CPU core is now CPU core 10. Use `--poh-pinned-cpu-core` to override it. #### Deprecations * `--accounts-db-access-storages-method` is now deprecated and a no-op (the `mmap` value was deprecated in v4.0.0; mmap mode has now been removed entirely). The flag is still accepted for backward compatibility, but account storages are always accessed via file I/O. +* `--experimental-poh-pinned-cpu-core` is now deprecated. Use `--poh-pinned-cpu-core` instead. #### Changes * Turbine shred ingestion now rejects shreds more than half an epoch in the future (previously up to 2 full epochs ahead was accepted). ### CLI diff --git a/docs/src/operations/running-with-af-xdp.md b/docs/src/operations/running-with-af-xdp.md index 94b1f57b4a2..8f35cad3be3 100644 --- a/docs/src/operations/running-with-af-xdp.md +++ b/docs/src/operations/running-with-af-xdp.md @@ -15,27 +15,33 @@ Before rolling out XDP on a production validator, you should test it on your set * **Performance Gain:** Confirm that performance is improved with the new configuration (e.g. lower CPU usage or higher throughput in Turbine’s retransmit stage). * **Metric Visibility:** Verify that you can observe the retransmit-stage metrics, which show time spent sending shreds, to gauge the impact of XDP on network transmission. -To enable XDP in Agave, add the following command-line flags to your validator startup command (using Agave v3.0.9+): +XDP is enabled by default on Linux in Agave. The default XDP configuration uses copy mode on an auto-selected CPU separate from the PoH pinned CPU core. To use different CPU cores for XDP, pass: ```bash ---experimental-retransmit-xdp-cpu-cores 1 ---experimental-retransmit-xdp-zero-copy # Do NOT pass this flag when using the bnxt_en driver. ---experimental-poh-pinned-cpu-core 10 +--xdp-cpu-cores 1 ``` -Note that --experimental-retransmit-xdp-zero-copy will avoid using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). As a result, zero copy cannot be used with the bonded interface itself. When using a bonded network interface, specify the underlying member interface to which the XDP program should be attached: +Zero copy avoids using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). To opt in to zero copy, pass: ```bash ---experimental-retransmit-xdp-interface +--xdp-zero-copy ``` - Also note that XDP and PoH *must* be assigned to separate (physical) cores. The ---experimental-poh-pinned-cpu-core N flag can be used to move the PoH thread. +When using zero copy with a bonded network interface, you must pass `--xdp-interface` with the underlying member interface to which the XDP program should be attached: -Next, your validator binary will need to have access to a few higher level permissions. The validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. These can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: +```bash +--xdp-zero-copy --xdp-interface +``` + +Also note that XDP and PoH *must* be assigned to separate (physical) cores. The +`--poh-pinned-cpu-core N` flag can be used to move the PoH thread. + +Next, your validator binary will need to have access to a few higher level permissions. With default copy-mode XDP, the validator process requires the CAP_NET_RAW and CAP_NET_ADMIN capabilities. Zero copy additionally requires CAP_BPF and CAP_PERFMON. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN under the [Service] section or directly on the binary with the command: ```bash -sudo setcap cap_net_raw,cap_net_admin,cap_bpf,cap_perfmon=p +sudo setcap cap_net_raw,cap_net_admin=p # for default XDP w/o zero copy +# OR +sudo setcap cap_net_raw,cap_net_admin,cap_bpf,cap_perfmon=p # for XDP w/ zero copy #this command must be run each time the binary is replaced ``` diff --git a/multinode-demo/bootstrap-validator.sh b/multinode-demo/bootstrap-validator.sh index 6b231383e1d..fa75b51a88d 100755 --- a/multinode-demo/bootstrap-validator.sh +++ b/multinode-demo/bootstrap-validator.sh @@ -144,6 +144,7 @@ args+=( --no-wait-for-vote-to-start-leader --full-rpc-api --allow-private-addr + --no-xdp ) default_arg --gossip-port 8001 default_arg --log - diff --git a/multinode-demo/validator.sh b/multinode-demo/validator.sh index 0d76fab0284..07bd51a86fb 100755 --- a/multinode-demo/validator.sh +++ b/multinode-demo/validator.sh @@ -10,6 +10,7 @@ args=( --max-genesis-archive-unpacked-size 1073741824 --no-poh-speed-test --no-os-network-limits-test + --no-xdp ) airdrops_enabled=1 node_sol=500 # 500 SOL: number of SOL to airdrop the node for transaction fees and vote account rent exemption (ignored if airdrops_enabled=0) diff --git a/poh/src/poh_service.rs b/poh/src/poh_service.rs index c8dbc60ef95..06d12835f22 100644 --- a/poh/src/poh_service.rs +++ b/poh/src/poh_service.rs @@ -40,7 +40,7 @@ pub const DEFAULT_HASHES_PER_BATCH: u64 = TARGET_HASH_BATCH_TIME_US * DEFAULT_HASHES_PER_SECOND / 1_000_000; #[cfg(target_os = "linux")] -pub const DEFAULT_PINNED_CPU_CORE: Option = Some(0); +pub const DEFAULT_PINNED_CPU_CORE: Option = Some(10); #[cfg(not(target_os = "linux"))] pub const DEFAULT_PINNED_CPU_CORE: Option = None; diff --git a/scripts/run.sh b/scripts/run.sh index 18f13e339b1..5452a8d4dfc 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -121,6 +121,7 @@ args=( --require-tower --no-wait-for-vote-to-start-leader --no-os-network-limits-test + --no-xdp ) # shellcheck disable=SC2086 agave-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS & diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 7c5484ac007..86a5823e2fa 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -158,18 +158,31 @@ fn deprecated_arguments() -> Vec { .conflicts_with("accounts_index_limit"), replaced_by: "accounts-index-limit", ); + add_arg!( + // deprecated in v4.2.0 + Arg::with_name("experimental_poh_pinned_cpu_core") + .long("experimental-poh-pinned-cpu-core") + .takes_value(true) + .value_name("CPU_ID") + .conflicts_with("poh_pinned_cpu_core") + .validator(is_parsable::) + .help("Specify which CPU core PoH is pinned to. Use --poh-pinned-cpu-core instead"), + replaced_by: "poh-pinned-cpu-core", + ); add_arg!( // deprecated in v4.1.0 Arg::with_name("experimental_retransmit_xdp_cpu_cores") .long("experimental-retransmit-xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") + .conflicts_with("no_xdp") .conflicts_with("xdp_cpu_cores") .validator(|value| { validate_cpu_ranges(value, "--experimental-retransmit-xdp-cpu-cores") }) .help( - "Enable XDP retransmit on the specified CPU cores. Use --xdp-cpu-cores instead", + "Use the specified CPU cores for XDP; must not include the PoH pinned CPU core. \ + Use --xdp-cpu-cores instead", ), replaced_by: "xdp-cpu-cores", ); @@ -179,9 +192,10 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-interface") .takes_value(true) .value_name("INTERFACE") + .conflicts_with("no_xdp") .conflicts_with("xdp_interface") .requires("experimental_retransmit_xdp_cpu_cores") - .help("Network interface to use for XDP retransmit. Use --xdp-interface instead"), + .help("Network interface to use for XDP. Use --xdp-interface instead"), replaced_by: "xdp-interface", ); add_arg!( @@ -189,6 +203,7 @@ fn deprecated_arguments() -> Vec { Arg::with_name("experimental_retransmit_xdp_zero_copy") .long("experimental-retransmit-xdp-zero-copy") .takes_value(false) + .conflicts_with("no_xdp") .conflicts_with("xdp_zero_copy") .requires("experimental_retransmit_xdp_cpu_cores") .help("Enable XDP zero copy. Use --xdp-zero-copy instead"), diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 6ac31cda870..e68bd764440 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -30,7 +30,7 @@ use { solana_send_transaction_service::send_transaction_service::Config as SendTransactionServiceConfig, solana_signer::Signer, solana_unified_scheduler_pool::DefaultSchedulerPool, - std::{collections::HashSet, net::SocketAddr, path::PathBuf, str::FromStr}, + std::{collections::HashSet, net::SocketAddr, path::PathBuf}, }; const EXCLUDE_KEY: &str = "account-index-exclude-key"; @@ -864,12 +864,11 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, ) .arg( Arg::with_name("poh_pinned_cpu_core") - .hidden(hidden_unless_forced()) - .long("experimental-poh-pinned-cpu-core") + .long("poh-pinned-cpu-core") .takes_value(true) .value_name("CPU_ID") - .validator(|s| usize::from_str(&s).map(|_| ()).map_err(|e| e.to_string())) - .help("Specify which CPU core PoH is pinned to"), + .validator(is_parsable::) + .help("Specify which CPU core PoH is pinned to. Defaults to CPU 10 on Linux"), ) .arg( Arg::with_name("poh_hashes_per_batch") @@ -1208,11 +1207,24 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .validator(|s| is_within_range(s, 1..)) .help(DefaultSchedulerPool::cli_message()), ) + .arg( + Arg::with_name("no_xdp") + .long("no-xdp") + .takes_value(false) + .conflicts_with("experimental_retransmit_xdp_cpu_cores") + .conflicts_with("experimental_retransmit_xdp_interface") + .conflicts_with("experimental_retransmit_xdp_zero_copy") + .conflicts_with("xdp_cpu_cores") + .conflicts_with("xdp_interface") + .conflicts_with("xdp_zero_copy") + .help("Do not use XDP transmit"), + ) .arg( Arg::with_name("xdp_interface") .long("xdp-interface") .takes_value(true) .value_name("INTERFACE") + .conflicts_with("no_xdp") .requires("xdp_cpu_cores") .help("Network interface to use for XDP"), ) @@ -1221,13 +1233,15 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .long("xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") + .conflicts_with("no_xdp") .validator(|value| validate_cpu_ranges(value, "--xdp-cpu-cores")) - .help("Use the specified CPU cores for XDP"), + .help("Use the specified CPU cores for XDP; must not include the PoH pinned CPU core"), ) .arg( Arg::with_name("xdp_zero_copy") .long("xdp-zero-copy") .takes_value(false) + .conflicts_with("no_xdp") .requires("xdp_cpu_cores") .help("Enable XDP zero copy. Requires hardware support"), ) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 37b441b9836..76c8ba95dde 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -1,3 +1,5 @@ +#[cfg(target_os = "linux")] +use agave_cpu_utils::cpu_affinity; use { crate::{ admin_rpc_service::{self, StakedNodesOverrides, load_staked_nodes_overrides}, @@ -84,12 +86,71 @@ use { #[cfg(target_os = "linux")] use {agave_xdp::transmitter::XdpConfig, solana_clap_utils::input_parsers::parse_cpu_ranges}; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Operation { Initialize, Run, } +#[cfg(target_os = "linux")] +fn parse_xdp_transmit_config( + matches: &ArgMatches, + bind_addresses: &BindIpAddrs, + operation: Operation, +) -> Result, String> { + if matches.is_present("no_xdp") || operation == Operation::Initialize { + return Ok(None); + } + + if bind_addresses.len() > 1 { + return Err(String::from("XDP cannot be used in a multihoming context")); + } + + let xdp_interface = matches + .value_of("xdp_interface") + .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); + let xdp_zero_copy = matches.is_present("xdp_zero_copy") + || matches.is_present("experimental_retransmit_xdp_zero_copy"); + let xdp_cpu_ranges = matches + .value_of("xdp_cpu_cores") + .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")); + let poh_pinned_cpu_core = value_of(matches, "poh_pinned_cpu_core") + .or_else(|| value_of(matches, "experimental_poh_pinned_cpu_core")) + .or(poh_service::DEFAULT_PINNED_CPU_CORE); + let is_poh_pinned_cpu_core = |cpu| Some(cpu) == poh_pinned_cpu_core; + let xdp_cpus = if let Some(cpu_ranges) = xdp_cpu_ranges { + let xdp_cpus = parse_cpu_ranges(cpu_ranges).map_err(|err| err.to_string())?; + if let Some(poh_cpu) = poh_pinned_cpu_core.filter(|poh_cpu| xdp_cpus.contains(poh_cpu)) { + return Err(format!( + "XDP CPU cores must not include the PoH pinned CPU core ({poh_cpu})" + )); + } + xdp_cpus + } else { + let cpu = cpu_affinity(None) + .map_err(|err| { + format!( + "failed to query CPU affinity for XDP CPU selection: {err}. Provide \ + --xdp-cpu-cores explicitly" + ) + })? + .into_iter() + .rev() + .map(|cpu| *cpu) + .find(|cpu| !is_poh_pinned_cpu_core(*cpu)) + .ok_or_else(|| { + format!( + "XDP requires an allowed CPU core separate from PoH (core \ + {poh_pinned_cpu_core:?})" + ) + })?; + vec![cpu] + }; + + info!("XDP enabled on CPU cores: {xdp_cpus:?}"); + Ok(Some(XdpConfig::new(xdp_interface, xdp_cpus, xdp_zero_copy))) +} + pub fn execute( matches: &ArgMatches, solana_version: &str, @@ -164,29 +225,7 @@ pub fn execute( } } #[cfg(target_os = "linux")] - let xdp_transmit_config = if let Some(xdp_cpu_cores) = matches - .value_of("xdp_cpu_cores") - .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")) - { - let xdp_interface = matches - .value_of("xdp_interface") - .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); - let xdp_zero_copy = matches.is_present("xdp_zero_copy") - || matches.is_present("experimental_retransmit_xdp_zero_copy"); - let config = XdpConfig::new( - xdp_interface, - parse_cpu_ranges(xdp_cpu_cores).unwrap(), - xdp_zero_copy, - ); - if bind_addresses.len() > 1 { - Err(String::from( - "--xdp-cpu-cores cannot be used in a multihoming context", - ))?; - } - Some(config) - } else { - None - }; + let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses, operation)?; let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) @@ -426,12 +465,18 @@ pub fn execute( let (xdp_transmit_setup, xdp_network_config_report) = (None, None); #[cfg(target_os = "linux")] - let poh_pinned_cpu_core = - value_of(matches, "poh_pinned_cpu_core").or(poh_service::DEFAULT_PINNED_CPU_CORE); + let poh_pinned_cpu_core = value_of(matches, "poh_pinned_cpu_core") + .or_else(|| value_of(matches, "experimental_poh_pinned_cpu_core")) + .or(poh_service::DEFAULT_PINNED_CPU_CORE); #[cfg(not(target_os = "linux"))] let poh_pinned_cpu_core = None; + #[cfg(target_os = "linux")] + if let Some(poh_pinned_cpu_core) = poh_pinned_cpu_core { + info!("PoH pinned CPU core: {poh_pinned_cpu_core}"); + } + solana_core::validator::report_target_features(); let authorized_voter_keypairs = keypairs_of(matches, "authorized_voter_keypairs") @@ -1378,3 +1423,155 @@ fn new_snapshot_config( Ok(snapshot_config) } + +#[cfg(all(test, target_os = "linux"))] +mod tests { + use { + super::*, + std::net::{IpAddr, Ipv4Addr}, + }; + + fn xdp_config_for_args( + args: &[&str], + bind_addresses: &BindIpAddrs, + ) -> Result, String> { + xdp_config_for_args_and_operation(args, bind_addresses, Operation::Run) + } + + fn xdp_config_for_args_and_operation( + args: &[&str], + bind_addresses: &BindIpAddrs, + operation: Operation, + ) -> Result, String> { + let default_args = cli::DefaultArgs::default(); + let matches = + cli::app("test", &default_args).get_matches_from([&["agave-validator"], args].concat()); + parse_xdp_transmit_config(&matches, bind_addresses, operation) + } + + #[test] + fn xdp_config_accepts_default_and_explicit_configs() { + let bind_addresses = BindIpAddrs::default(); + + let allowed_cpus = cpu_affinity(None).unwrap(); + match xdp_config_for_args(&[], &bind_addresses) { + Ok(Some(config)) => { + assert_eq!(config.interface, None); + assert_eq!(config.cpus.len(), 1); + assert!(!config.zero_copy); + if let Some(poh_pinned_cpu_core) = poh_service::DEFAULT_PINNED_CPU_CORE { + assert!(!config.cpus.contains(&poh_pinned_cpu_core)); + } + } + Ok(None) => panic!("XDP should default on while running"), + Err(err) => { + let only_poh_core_allowed = allowed_cpus + .iter() + .all(|cpu| Some(**cpu) == poh_service::DEFAULT_PINNED_CPU_CORE); + assert!(only_poh_core_allowed); + assert!(err.contains("separate from PoH")); + } + } + + if allowed_cpus.len() > 1 { + let poh_cpu = **allowed_cpus.last().unwrap(); + let poh_cpu_arg = poh_cpu.to_string(); + let config = + xdp_config_for_args(&["--poh-pinned-cpu-core", &poh_cpu_arg], &bind_addresses) + .unwrap() + .unwrap(); + assert_eq!(config.cpus.len(), 1); + assert!(!config.cpus.contains(&poh_cpu)); + } + + let config = xdp_config_for_args( + &["--xdp-interface", "eth0", "--xdp-cpu-cores", "2-3"], + &bind_addresses, + ) + .unwrap() + .unwrap(); + assert_eq!(config.interface.as_deref(), Some("eth0")); + assert_eq!(config.cpus, vec![2, 3]); + assert!(!config.zero_copy); + + let config = xdp_config_for_args( + &["--xdp-zero-copy", "--xdp-cpu-cores", "2"], + &bind_addresses, + ) + .unwrap() + .unwrap(); + assert_eq!(config.interface, None); + assert_eq!(config.cpus, vec![2]); + assert!(config.zero_copy); + + let config = xdp_config_for_args( + &[ + "--experimental-retransmit-xdp-zero-copy", + "--experimental-retransmit-xdp-interface", + "eth0", + "--experimental-retransmit-xdp-cpu-cores", + "2", + ], + &bind_addresses, + ) + .unwrap() + .unwrap(); + assert_eq!(config.interface.as_deref(), Some("eth0")); + assert_eq!(config.cpus, vec![2]); + assert!(config.zero_copy); + } + + #[test] + fn xdp_config_rejects_or_skips_invalid_modes() { + let bind_addresses = BindIpAddrs::default(); + assert!( + xdp_config_for_args(&["--no-xdp"], &bind_addresses) + .unwrap() + .is_none() + ); + assert!( + xdp_config_for_args_and_operation( + &["--xdp-zero-copy", "--xdp-cpu-cores", "2"], + &bind_addresses, + Operation::Initialize, + ) + .unwrap() + .is_none() + ); + + let err = xdp_config_for_args( + &["--poh-pinned-cpu-core", "1", "--xdp-cpu-cores", "1"], + &bind_addresses, + ) + .unwrap_err(); + assert!(err.contains("PoH pinned CPU core")); + assert!(!err.contains("--no-xdp")); + + let multihomed = BindIpAddrs::new(vec![ + IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8)), + IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), + ]) + .unwrap(); + let err = xdp_config_for_args(&[], &multihomed).unwrap_err(); + assert!(err.contains("multihoming")); + assert!(!err.contains("--no-xdp")); + assert!( + xdp_config_for_args(&["--no-xdp"], &multihomed) + .unwrap() + .is_none() + ); + + let default_args = cli::DefaultArgs::default(); + for args in [ + vec!["agave-validator", "--xdp-zero-copy"], + vec!["agave-validator", "--xdp-interface", "eth0"], + vec!["agave-validator", "--no-xdp", "--xdp-cpu-cores", "2"], + ] { + assert!( + cli::app("test", &default_args) + .get_matches_from_safe(args) + .is_err() + ); + } + } +}