From f4e230ca311582da1d026fea88fcd6d571841648 Mon Sep 17 00:00:00 2001 From: greg Date: Thu, 11 Jun 2026 22:09:26 +0000 Subject: [PATCH 1/9] enable xdp by default --- CHANGELOG.md | 6 + docs/src/operations/running-with-af-xdp.md | 29 ++-- validator/src/cli.rs | 26 +++- validator/src/commands/run/args.rs | 35 +++-- validator/src/commands/run/execute.rs | 171 +++++++++++++++++---- 5 files changed, 214 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8c9ec0b80a..681bf921c90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,10 +23,16 @@ Release channels have their own copy of this changelog: `getLatestBlockhash` response together with its context (notably `context.slot`). ### Validator #### Breaking +* XDP transmit is now enabled by default on Linux with zero copy on CPU core 1. Use `--disable-xdp` to opt + out, `--xdp-cpu-cores` to override the XDP CPU assignment, and `--disable-xdp-zero-copy` to keep + XDP enabled without zero copy. Default validator startup now requires the XDP capabilities unless + XDP is disabled. #### Deprecations * `--accounts-db-access-storages-method` is now deprecated and a no-op (the `mmap` value was deprecated in v4.0.0; mmap mode has now been removed entirely). The flag is still accepted for backward compatibility, but account storages are always accessed via file I/O. +* `--xdp-zero-copy` is now deprecated and a no-op because XDP zero copy is enabled by default. Use + `--disable-xdp-zero-copy` to opt out of zero copy. #### Changes * Turbine shred ingestion now rejects shreds more than half an epoch in the future (previously up to 2 full epochs ahead was accepted). ### CLI diff --git a/docs/src/operations/running-with-af-xdp.md b/docs/src/operations/running-with-af-xdp.md index 94b1f57b4a2..9267ebe4590 100644 --- a/docs/src/operations/running-with-af-xdp.md +++ b/docs/src/operations/running-with-af-xdp.md @@ -15,24 +15,33 @@ Before rolling out XDP on a production validator, you should test it on your set * **Performance Gain:** Confirm that performance is improved with the new configuration (e.g. lower CPU usage or higher throughput in Turbine’s retransmit stage). * **Metric Visibility:** Verify that you can observe the retransmit-stage metrics, which show time spent sending shreds, to gauge the impact of XDP on network transmission. -To enable XDP in Agave, add the following command-line flags to your validator startup command (using Agave v3.0.9+): +XDP is enabled by default on Linux in Agave. The default XDP configuration uses CPU core 1 and enables zero copy. To use different CPU cores for XDP, pass: ```bash ---experimental-retransmit-xdp-cpu-cores 1 ---experimental-retransmit-xdp-zero-copy # Do NOT pass this flag when using the bnxt_en driver. ---experimental-poh-pinned-cpu-core 10 +--xdp-cpu-cores 2 ``` -Note that --experimental-retransmit-xdp-zero-copy will avoid using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). As a result, zero copy cannot be used with the bonded interface itself. When using a bonded network interface, specify the underlying member interface to which the XDP program should be attached: +To disable XDP entirely, pass: ```bash ---experimental-retransmit-xdp-interface +--disable-xdp ``` - Also note that XDP and PoH *must* be assigned to separate (physical) cores. The ---experimental-poh-pinned-cpu-core N flag can be used to move the PoH thread. +Zero copy avoids using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). On NICs or drivers that do not support zero copy, such as the `bnxt_en` driver, keep XDP enabled and pass: -Next, your validator binary will need to have access to a few higher level permissions. The validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. These can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: +```bash +--disable-xdp-zero-copy +``` + +Zero copy cannot be used with a bonded interface itself. When using a bonded network interface, specify the underlying member interface to which the XDP program should be attached: + +```bash +--xdp-interface +``` + +Also note that XDP and PoH *must* be assigned to separate (physical) cores. PoH defaults to CPU core 0, and XDP defaults to CPU core 1. The --experimental-poh-pinned-cpu-core N flag can be used to move the PoH thread. + +Next, your validator binary will need to have access to a few higher level permissions. With default zero-copy XDP, the validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. Passing --disable-xdp-zero-copy avoids the CAP_BPF and CAP_PERFMON requirements; passing --disable-xdp avoids XDP capability requirements entirely. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: ```bash sudo setcap cap_net_raw,cap_net_admin,cap_bpf,cap_perfmon=p @@ -78,7 +87,7 @@ modinfo bnxt_en | `igb` / Intel I210 | ✅ Works | ✅ Works w/ caveat | caveat: `igb` requires kernel `>= 6.14` for ZC. Field report: I210 on 6.17 enabled ZC but had severe network degradation/high skips, so fall back to non-ZC if unstable. | | `ixgbe` / Intel X540, X550 | ✅ Works | ⚠️ Mixed / unstable | Alessandro guidance for freeze/link-flap cases: start without ZC while `ixgbe` is debugged. Stay tuned! | | `ice` / Intel E800 | ✅ Works | ✅ Works | `ice` supports native XDP and AF_XDP zero-copy. Caveats: XDP is blocked for frame sizes larger than 3KB | -| `bnxt_en` / Broadcom | ✅ Works | ❌ Does not work | `bnxt_en` works with XDP, but do not pass the zero-copy flag. Broadcom non-ZC can still be reasonably fast. But please get a non-broadcom NIC | +| `bnxt_en` / Broadcom | ✅ Works | ❌ Does not work | `bnxt_en` works with XDP, but pass `--disable-xdp-zero-copy`. Broadcom non-ZC can still be reasonably fast. But please get a non-broadcom NIC | | `tg3` / Broadcom | ❌ No native/driver XDP; generic XDP only at best | ❌ Does not work | Broadcom BCM5720 uses the `tg3` driver. Treat as unsupported for Agave/AF_XDP performance work: no native XDP and no AF_XDP zero-copy. | | `r8169` / Realtek | ❌ No native/driver XDP; generic XDP only at best | ❌ Does not work | Realtek NICs using `r8169` should be treated as unsupported for Agave/AF_XDP performance work: no native XDP and no AF_XDP zero-copy.| | `mlx4_en` / Mellanox ConnectX-3 | ❌ Do not use | ❌ Does not work | Driver is no longer supported. Zero-copy does not work. Do not use. | diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 7c5484ac007..9894f637e96 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -164,13 +164,12 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") + .conflicts_with("disable_xdp") .conflicts_with("xdp_cpu_cores") .validator(|value| { validate_cpu_ranges(value, "--experimental-retransmit-xdp-cpu-cores") }) - .help( - "Enable XDP retransmit on the specified CPU cores. Use --xdp-cpu-cores instead", - ), + .help("Use the specified CPU cores for XDP. Use --xdp-cpu-cores instead"), replaced_by: "xdp-cpu-cores", ); add_arg!( @@ -179,9 +178,9 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-interface") .takes_value(true) .value_name("INTERFACE") + .conflicts_with("disable_xdp") .conflicts_with("xdp_interface") - .requires("experimental_retransmit_xdp_cpu_cores") - .help("Network interface to use for XDP retransmit. Use --xdp-interface instead"), + .help("Network interface to use for XDP. Use --xdp-interface instead"), replaced_by: "xdp-interface", ); add_arg!( @@ -189,10 +188,11 @@ fn deprecated_arguments() -> Vec { Arg::with_name("experimental_retransmit_xdp_zero_copy") .long("experimental-retransmit-xdp-zero-copy") .takes_value(false) + .conflicts_with("disable_xdp") + .conflicts_with("disable_xdp_zero_copy") .conflicts_with("xdp_zero_copy") - .requires("experimental_retransmit_xdp_cpu_cores") - .help("Enable XDP zero copy. Use --xdp-zero-copy instead"), - replaced_by: "xdp-zero-copy", + .help("No-op; XDP zero copy is enabled by default"), + usage_warning: "XDP zero copy is enabled by default. Use --disable-xdp-zero-copy to disable it.", ); add_arg!( // deprecated in v4.0.0 @@ -203,6 +203,16 @@ fn deprecated_arguments() -> Vec { .help("Controls the TPU connection pool size per remote address"), usage_warning:"This parameter is misleading, avoid setting it", ); + add_arg!( + // deprecated in v4.2.0 + Arg::with_name("xdp_zero_copy") + .long("xdp-zero-copy") + .takes_value(false) + .conflicts_with("disable_xdp") + .conflicts_with("disable_xdp_zero_copy") + .help("No-op; XDP zero copy is enabled by default"), + usage_warning: "XDP zero copy is enabled by default. Use --disable-xdp-zero-copy to disable it.", + ); res } diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 6ac31cda870..63856cfeafd 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -35,6 +35,7 @@ use { const EXCLUDE_KEY: &str = "account-index-exclude-key"; const INCLUDE_KEY: &str = "account-index-include-key"; +pub const DEFAULT_XDP_CPU_CORE: usize = 1; pub mod account_secondary_indexes; pub mod blockstore_options; @@ -1208,12 +1209,34 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .validator(|s| is_within_range(s, 1..)) .help(DefaultSchedulerPool::cli_message()), ) + .arg( + Arg::with_name("disable_xdp") + .long("disable-xdp") + .takes_value(false) + .conflicts_with("experimental_retransmit_xdp_cpu_cores") + .conflicts_with("experimental_retransmit_xdp_interface") + .conflicts_with("experimental_retransmit_xdp_zero_copy") + .conflicts_with("xdp_cpu_cores") + .conflicts_with("xdp_interface") + .conflicts_with("xdp_zero_copy") + .conflicts_with("disable_xdp_zero_copy") + .help("Disable XDP transmit, which is enabled by default"), + ) + .arg( + Arg::with_name("disable_xdp_zero_copy") + .long("disable-xdp-zero-copy") + .takes_value(false) + .conflicts_with("experimental_retransmit_xdp_zero_copy") + .conflicts_with("xdp_zero_copy") + .conflicts_with("disable_xdp") + .help("Disable XDP zero copy while leaving XDP transmit enabled"), + ) .arg( Arg::with_name("xdp_interface") .long("xdp-interface") .takes_value(true) .value_name("INTERFACE") - .requires("xdp_cpu_cores") + .conflicts_with("disable_xdp") .help("Network interface to use for XDP"), ) .arg( @@ -1221,15 +1244,9 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .long("xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") + .conflicts_with("disable_xdp") .validator(|value| validate_cpu_ranges(value, "--xdp-cpu-cores")) - .help("Use the specified CPU cores for XDP"), - ) - .arg( - Arg::with_name("xdp_zero_copy") - .long("xdp-zero-copy") - .takes_value(false) - .requires("xdp_cpu_cores") - .help("Enable XDP zero copy. Requires hardware support"), + .help("Use the specified CPU cores for XDP. Defaults to CPU core 1"), ) .args(&pub_sub_config::args(/*test_validator:*/ false)) .args(&json_rpc_config::args()) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 37b441b9836..5d2ed63b72c 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -12,6 +12,7 @@ use { snapshot_config::{SnapshotConfig, SnapshotUsage}, }, agave_votor::vote_history_storage, + agave_xdp::transmitter::XdpConfig, clap::{ArgMatches, crate_name, value_t, value_t_or_exit, values_t, values_t_or_exit}, crossbeam_channel::unbounded, log::*, @@ -81,8 +82,6 @@ use { sync::{Arc, RwLock, atomic::AtomicBool}, }, }; -#[cfg(target_os = "linux")] -use {agave_xdp::transmitter::XdpConfig, solana_clap_utils::input_parsers::parse_cpu_ranges}; #[derive(Debug, PartialEq, Eq)] pub enum Operation { @@ -90,6 +89,60 @@ pub enum Operation { Run, } +fn parse_xdp_transmit_config( + matches: &ArgMatches, + bind_addresses: &BindIpAddrs, +) -> Result, String> { + if matches.is_present("disable_xdp") { + return Ok(None); + } + + #[cfg(not(target_os = "linux"))] + { + let _ = bind_addresses; + let xdp_config_requested = matches.value_of("xdp_cpu_cores").is_some() + || matches + .value_of("experimental_retransmit_xdp_cpu_cores") + .is_some() + || matches.value_of("xdp_interface").is_some() + || matches + .value_of("experimental_retransmit_xdp_interface") + .is_some() + || matches.is_present("disable_xdp_zero_copy") + || matches.is_present("xdp_zero_copy") + || matches.is_present("experimental_retransmit_xdp_zero_copy"); + if xdp_config_requested { + return Err(String::from("XDP is only supported on Linux")); + } + Ok(None) + } + + #[cfg(target_os = "linux")] + { + if bind_addresses.len() > 1 { + return Err(String::from( + "XDP cannot be used in a multihoming context. Use --disable-xdp to disable XDP", + )); + } + + let xdp_interface = matches + .value_of("xdp_interface") + .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); + let xdp_cpus = matches + .value_of("xdp_cpu_cores") + .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")) + .map(|cpu_ranges| { + solana_clap_utils::input_parsers::parse_cpu_ranges(cpu_ranges) + .map_err(|err| err.to_string()) + }) + .transpose()? + .unwrap_or_else(|| vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE]); + let xdp_zero_copy = !matches.is_present("disable_xdp_zero_copy"); + + Ok(Some(XdpConfig::new(xdp_interface, xdp_cpus, xdp_zero_copy))) + } +} + pub fn execute( matches: &ArgMatches, solana_version: &str, @@ -163,30 +216,8 @@ pub fn execute( Err(format!("invalid entrypoint address: {addr}"))?; } } - #[cfg(target_os = "linux")] - let xdp_transmit_config = if let Some(xdp_cpu_cores) = matches - .value_of("xdp_cpu_cores") - .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")) - { - let xdp_interface = matches - .value_of("xdp_interface") - .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); - let xdp_zero_copy = matches.is_present("xdp_zero_copy") - || matches.is_present("experimental_retransmit_xdp_zero_copy"); - let config = XdpConfig::new( - xdp_interface, - parse_cpu_ranges(xdp_cpu_cores).unwrap(), - xdp_zero_copy, - ); - if bind_addresses.len() > 1 { - Err(String::from( - "--xdp-cpu-cores cannot be used in a multihoming context", - ))?; - } - Some(config) - } else { - None - }; + + let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses)?; let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) @@ -1378,3 +1409,91 @@ fn new_snapshot_config( Ok(snapshot_config) } + +#[cfg(all(test, target_os = "linux"))] +mod tests { + use { + super::*, + std::net::{IpAddr, Ipv4Addr}, + }; + + fn xdp_config_for_args( + args: &[&str], + bind_addresses: &BindIpAddrs, + ) -> Result, String> { + let default_args = cli::DefaultArgs::default(); + let matches = + cli::app("test", &default_args).get_matches_from([&["agave-validator"], args].concat()); + parse_xdp_transmit_config(&matches, bind_addresses) + } + + #[test] + fn default_xdp_config_uses_zero_copy_and_default_cpu() { + let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args(&[], &bind_addresses).unwrap().unwrap(); + + assert_eq!(config.interface, None); + assert_eq!( + config.cpus, + vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] + ); + assert!(config.zero_copy); + } + + #[test] + fn disable_xdp_returns_no_config() { + let bind_addresses = BindIpAddrs::default(); + assert!(xdp_config_for_args(&["--disable-xdp"], &bind_addresses) + .unwrap() + .is_none()); + } + + #[test] + fn xdp_cpu_interface_and_zero_copy_are_configurable() { + let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args( + &[ + "--xdp-interface", + "eth0", + "--xdp-cpu-cores", + "2-3", + "--disable-xdp-zero-copy", + ], + &bind_addresses, + ) + .unwrap() + .unwrap(); + + assert_eq!(config.interface.as_deref(), Some("eth0")); + assert_eq!(config.cpus, vec![2, 3]); + assert!(!config.zero_copy); + } + + #[test] + fn xdp_requires_opt_out_in_multihoming_context() { + let bind_addresses = BindIpAddrs::new(vec![ + IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8)), + IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), + ]) + .unwrap(); + + let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); + assert!(err.contains("--disable-xdp")); + assert!(xdp_config_for_args(&["--disable-xdp"], &bind_addresses) + .unwrap() + .is_none()); + } + + #[test] + fn disable_xdp_conflicts_with_xdp_overrides() { + let default_args = cli::DefaultArgs::default(); + let matches = cli::app("test", &default_args).get_matches_from_safe(vec![ + "agave-validator", + "--disable-xdp", + "--xdp-cpu-cores", + "2", + ]); + + assert!(matches.is_err()); + } +} From 5d86654794a3cc2a303ca6918a7f9911d9d78fed Mon Sep 17 00:00:00 2001 From: greg Date: Thu, 11 Jun 2026 23:40:20 +0000 Subject: [PATCH 2/9] deprecate poh experimental arg. and set default poh core to 10 --- CHANGELOG.md | 2 + docs/src/operations/running-with-af-xdp.md | 2 +- poh/src/poh_service.rs | 2 +- validator/src/cli.rs | 11 ++++ validator/src/commands/run/args.rs | 5 +- validator/src/commands/run/execute.rs | 59 +++++++++++++++++++++- 6 files changed, 74 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 681bf921c90..ba917331a26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,10 +27,12 @@ Release channels have their own copy of this changelog: out, `--xdp-cpu-cores` to override the XDP CPU assignment, and `--disable-xdp-zero-copy` to keep XDP enabled without zero copy. Default validator startup now requires the XDP capabilities unless XDP is disabled. +* The default PoH pinned CPU core is now CPU core 10. Use `--poh-pinned-cpu-core` to override it. #### Deprecations * `--accounts-db-access-storages-method` is now deprecated and a no-op (the `mmap` value was deprecated in v4.0.0; mmap mode has now been removed entirely). The flag is still accepted for backward compatibility, but account storages are always accessed via file I/O. +* `--experimental-poh-pinned-cpu-core` is now deprecated. Use `--poh-pinned-cpu-core` instead. * `--xdp-zero-copy` is now deprecated and a no-op because XDP zero copy is enabled by default. Use `--disable-xdp-zero-copy` to opt out of zero copy. #### Changes diff --git a/docs/src/operations/running-with-af-xdp.md b/docs/src/operations/running-with-af-xdp.md index 9267ebe4590..f0e04474960 100644 --- a/docs/src/operations/running-with-af-xdp.md +++ b/docs/src/operations/running-with-af-xdp.md @@ -39,7 +39,7 @@ Zero copy cannot be used with a bonded interface itself. When using a bonded net --xdp-interface ``` -Also note that XDP and PoH *must* be assigned to separate (physical) cores. PoH defaults to CPU core 0, and XDP defaults to CPU core 1. The --experimental-poh-pinned-cpu-core N flag can be used to move the PoH thread. +Also note that XDP and PoH *must* be assigned to separate (physical) cores. PoH defaults to CPU core 10, and XDP defaults to CPU core 1. The --poh-pinned-cpu-core N flag can be used to move the PoH thread. Next, your validator binary will need to have access to a few higher level permissions. With default zero-copy XDP, the validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. Passing --disable-xdp-zero-copy avoids the CAP_BPF and CAP_PERFMON requirements; passing --disable-xdp avoids XDP capability requirements entirely. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: diff --git a/poh/src/poh_service.rs b/poh/src/poh_service.rs index c8dbc60ef95..06d12835f22 100644 --- a/poh/src/poh_service.rs +++ b/poh/src/poh_service.rs @@ -40,7 +40,7 @@ pub const DEFAULT_HASHES_PER_BATCH: u64 = TARGET_HASH_BATCH_TIME_US * DEFAULT_HASHES_PER_SECOND / 1_000_000; #[cfg(target_os = "linux")] -pub const DEFAULT_PINNED_CPU_CORE: Option = Some(0); +pub const DEFAULT_PINNED_CPU_CORE: Option = Some(10); #[cfg(not(target_os = "linux"))] pub const DEFAULT_PINNED_CPU_CORE: Option = None; diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 9894f637e96..9e5cba75058 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -158,6 +158,17 @@ fn deprecated_arguments() -> Vec { .conflicts_with("accounts_index_limit"), replaced_by: "accounts-index-limit", ); + add_arg!( + // deprecated in v4.2.0 + Arg::with_name("experimental_poh_pinned_cpu_core") + .long("experimental-poh-pinned-cpu-core") + .takes_value(true) + .value_name("CPU_CORE_INDEX") + .conflicts_with("poh_pinned_cpu_core") + .validator(|s| usize::from_str(&s).map(|_| ()).map_err(|e| e.to_string())) + .help("Specify which CPU core PoH is pinned to. Use --poh-pinned-cpu-core instead"), + replaced_by: "poh-pinned-cpu-core", + ); add_arg!( // deprecated in v4.1.0 Arg::with_name("experimental_retransmit_xdp_cpu_cores") diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 63856cfeafd..a64e4f7b1ed 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -865,10 +865,9 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, ) .arg( Arg::with_name("poh_pinned_cpu_core") - .hidden(hidden_unless_forced()) - .long("experimental-poh-pinned-cpu-core") + .long("poh-pinned-cpu-core") .takes_value(true) - .value_name("CPU_ID") + .value_name("CPU_CORE_INDEX") .validator(|s| usize::from_str(&s).map(|_| ()).map_err(|e| e.to_string())) .help("Specify which CPU core PoH is pinned to"), ) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 5d2ed63b72c..2c954bf8204 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -89,6 +89,13 @@ pub enum Operation { Run, } +#[cfg(target_os = "linux")] +fn parse_poh_pinned_cpu_core(matches: &ArgMatches) -> Option { + value_of(matches, "poh_pinned_cpu_core") + .or_else(|| value_of(matches, "experimental_poh_pinned_cpu_core")) + .or(poh_service::DEFAULT_PINNED_CPU_CORE) +} + fn parse_xdp_transmit_config( matches: &ArgMatches, bind_addresses: &BindIpAddrs, @@ -457,8 +464,7 @@ pub fn execute( let (xdp_transmit_setup, xdp_network_config_report) = (None, None); #[cfg(target_os = "linux")] - let poh_pinned_cpu_core = - value_of(matches, "poh_pinned_cpu_core").or(poh_service::DEFAULT_PINNED_CPU_CORE); + let poh_pinned_cpu_core = parse_poh_pinned_cpu_core(matches); #[cfg(not(target_os = "linux"))] let poh_pinned_cpu_core = None; @@ -1427,6 +1433,55 @@ mod tests { parse_xdp_transmit_config(&matches, bind_addresses) } + #[test] + fn poh_pinned_cpu_core_defaults_to_configured_default() { + let default_args = cli::DefaultArgs::default(); + let matches = cli::app("test", &default_args).get_matches_from(vec!["agave-validator"]); + + assert_eq!( + parse_poh_pinned_cpu_core(&matches), + poh_service::DEFAULT_PINNED_CPU_CORE + ); + } + + #[test] + fn poh_pinned_cpu_core_uses_stable_arg() { + let default_args = cli::DefaultArgs::default(); + let matches = cli::app("test", &default_args).get_matches_from(vec![ + "agave-validator", + "--poh-pinned-cpu-core", + "0", + ]); + + assert_eq!(parse_poh_pinned_cpu_core(&matches), Some(0)); + } + + #[test] + fn poh_pinned_cpu_core_accepts_deprecated_experimental_arg() { + let default_args = cli::DefaultArgs::default(); + let matches = cli::app("test", &default_args).get_matches_from(vec![ + "agave-validator", + "--experimental-poh-pinned-cpu-core", + "0", + ]); + + assert_eq!(parse_poh_pinned_cpu_core(&matches), Some(0)); + } + + #[test] + fn poh_pinned_cpu_core_args_conflict() { + let default_args = cli::DefaultArgs::default(); + let matches = cli::app("test", &default_args).get_matches_from_safe(vec![ + "agave-validator", + "--poh-pinned-cpu-core", + "0", + "--experimental-poh-pinned-cpu-core", + "0", + ]); + + assert!(matches.is_err()); + } + #[test] fn default_xdp_config_uses_zero_copy_and_default_cpu() { let bind_addresses = BindIpAddrs::default(); From 6d000a9449577bd5907625c59a375d4fd78a86ef Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 00:24:58 +0000 Subject: [PATCH 3/9] enforce xdp-interface on startup w/ zero copy --- CHANGELOG.md | 8 +-- docs/src/operations/running-with-af-xdp.md | 10 ++-- validator/src/commands/run/args.rs | 2 +- validator/src/commands/run/execute.rs | 62 +++++++++++++++++++--- 4 files changed, 65 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba917331a26..17175c4522d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,10 +23,10 @@ Release channels have their own copy of this changelog: `getLatestBlockhash` response together with its context (notably `context.slot`). ### Validator #### Breaking -* XDP transmit is now enabled by default on Linux with zero copy on CPU core 1. Use `--disable-xdp` to opt - out, `--xdp-cpu-cores` to override the XDP CPU assignment, and `--disable-xdp-zero-copy` to keep - XDP enabled without zero copy. Default validator startup now requires the XDP capabilities unless - XDP is disabled. +* XDP transmit is now enabled by default on Linux with zero copy on CPU core 1. Use `--xdp-interface` + to select the XDP interface, `--xdp-cpu-cores` to override the XDP CPU assignment, and + `--disable-xdp-zero-copy` to keep XDP enabled without zero copy. Default validator startup now + requires the XDP capabilities. * The default PoH pinned CPU core is now CPU core 10. Use `--poh-pinned-cpu-core` to override it. #### Deprecations * `--accounts-db-access-storages-method` is now deprecated and a no-op (the `mmap` value was diff --git a/docs/src/operations/running-with-af-xdp.md b/docs/src/operations/running-with-af-xdp.md index f0e04474960..76a7b720788 100644 --- a/docs/src/operations/running-with-af-xdp.md +++ b/docs/src/operations/running-with-af-xdp.md @@ -15,16 +15,16 @@ Before rolling out XDP on a production validator, you should test it on your set * **Performance Gain:** Confirm that performance is improved with the new configuration (e.g. lower CPU usage or higher throughput in Turbine’s retransmit stage). * **Metric Visibility:** Verify that you can observe the retransmit-stage metrics, which show time spent sending shreds, to gauge the impact of XDP on network transmission. -XDP is enabled by default on Linux in Agave. The default XDP configuration uses CPU core 1 and enables zero copy. To use different CPU cores for XDP, pass: +XDP is enabled by default on Linux in Agave. The default XDP configuration uses CPU core 1 and enables zero copy. Zero copy requires an explicit network interface: ```bash ---xdp-cpu-cores 2 +--xdp-interface ``` -To disable XDP entirely, pass: +To use different CPU cores for XDP, pass: ```bash ---disable-xdp +--xdp-cpu-cores 2 ``` Zero copy avoids using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). On NICs or drivers that do not support zero copy, such as the `bnxt_en` driver, keep XDP enabled and pass: @@ -41,7 +41,7 @@ Zero copy cannot be used with a bonded interface itself. When using a bonded net Also note that XDP and PoH *must* be assigned to separate (physical) cores. PoH defaults to CPU core 10, and XDP defaults to CPU core 1. The --poh-pinned-cpu-core N flag can be used to move the PoH thread. -Next, your validator binary will need to have access to a few higher level permissions. With default zero-copy XDP, the validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. Passing --disable-xdp-zero-copy avoids the CAP_BPF and CAP_PERFMON requirements; passing --disable-xdp avoids XDP capability requirements entirely. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: +Next, your validator binary will need to have access to a few higher level permissions. With default zero-copy XDP, the validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. Passing --disable-xdp-zero-copy avoids the CAP_BPF and CAP_PERFMON requirements. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: ```bash sudo setcap cap_net_raw,cap_net_admin,cap_bpf,cap_perfmon=p diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index a64e4f7b1ed..3f2dd519a2a 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -1236,7 +1236,7 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .takes_value(true) .value_name("INTERFACE") .conflicts_with("disable_xdp") - .help("Network interface to use for XDP"), + .help("Network interface to use for XDP. Required when XDP zero copy is enabled"), ) .arg( Arg::with_name("xdp_cpu_cores") diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 2c954bf8204..fdc1ccdd021 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -128,13 +128,21 @@ fn parse_xdp_transmit_config( { if bind_addresses.len() > 1 { return Err(String::from( - "XDP cannot be used in a multihoming context. Use --disable-xdp to disable XDP", + "XDP cannot be used in a multihoming context", )); } let xdp_interface = matches .value_of("xdp_interface") .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); + let xdp_zero_copy = !matches.is_present("disable_xdp_zero_copy"); + if xdp_zero_copy && xdp_interface.is_none() { + return Err(String::from( + "XDP zero copy requires an explicit network interface. Use --xdp-interface to \ + select the XDP interface, or --disable-xdp-zero-copy to use XDP without zero \ + copy", + )); + } let xdp_cpus = matches .value_of("xdp_cpu_cores") .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")) @@ -144,7 +152,6 @@ fn parse_xdp_transmit_config( }) .transpose()? .unwrap_or_else(|| vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE]); - let xdp_zero_copy = !matches.is_present("disable_xdp_zero_copy"); Ok(Some(XdpConfig::new(xdp_interface, xdp_cpus, xdp_zero_copy))) } @@ -1483,11 +1490,22 @@ mod tests { } #[test] - fn default_xdp_config_uses_zero_copy_and_default_cpu() { + fn default_xdp_config_requires_interface_for_zero_copy() { let bind_addresses = BindIpAddrs::default(); - let config = xdp_config_for_args(&[], &bind_addresses).unwrap().unwrap(); - assert_eq!(config.interface, None); + let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); + assert!(err.contains("--xdp-interface")); + assert!(err.contains("--disable-xdp-zero-copy")); + } + + #[test] + fn default_xdp_config_uses_zero_copy_default_cpu_and_configured_interface() { + let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args(&["--xdp-interface", "eth0"], &bind_addresses) + .unwrap() + .unwrap(); + + assert_eq!(config.interface.as_deref(), Some("eth0")); assert_eq!( config.cpus, vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] @@ -1495,6 +1513,20 @@ mod tests { assert!(config.zero_copy); } + #[test] + fn xdp_zero_copy_accepts_deprecated_interface_arg() { + let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args( + &["--experimental-retransmit-xdp-interface", "eth0"], + &bind_addresses, + ) + .unwrap() + .unwrap(); + + assert_eq!(config.interface.as_deref(), Some("eth0")); + assert!(config.zero_copy); + } + #[test] fn disable_xdp_returns_no_config() { let bind_addresses = BindIpAddrs::default(); @@ -1525,7 +1557,22 @@ mod tests { } #[test] - fn xdp_requires_opt_out_in_multihoming_context() { + fn xdp_without_zero_copy_can_infer_interface() { + let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args(&["--disable-xdp-zero-copy"], &bind_addresses) + .unwrap() + .unwrap(); + + assert_eq!(config.interface, None); + assert_eq!( + config.cpus, + vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] + ); + assert!(!config.zero_copy); + } + + #[test] + fn xdp_requires_single_bind_address() { let bind_addresses = BindIpAddrs::new(vec![ IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8)), IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), @@ -1533,7 +1580,8 @@ mod tests { .unwrap(); let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); - assert!(err.contains("--disable-xdp")); + assert!(err.contains("multihoming")); + assert!(!err.contains("--disable-xdp")); assert!(xdp_config_for_args(&["--disable-xdp"], &bind_addresses) .unwrap() .is_none()); From 41410028d710ef9292b537762a1847dd95db8445 Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 16:40:50 +0000 Subject: [PATCH 4/9] xdp copy mode default --- CHANGELOG.md | 10 ++--- docs/src/operations/running-with-af-xdp.md | 20 ++++------ validator/src/cli.rs | 14 +------ validator/src/commands/run/args.rs | 8 ++++ validator/src/commands/run/execute.rs | 46 ++++++++++++++++------ 5 files changed, 54 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17175c4522d..63572f52d40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,18 +23,16 @@ Release channels have their own copy of this changelog: `getLatestBlockhash` response together with its context (notably `context.slot`). ### Validator #### Breaking -* XDP transmit is now enabled by default on Linux with zero copy on CPU core 1. Use `--xdp-interface` - to select the XDP interface, `--xdp-cpu-cores` to override the XDP CPU assignment, and - `--disable-xdp-zero-copy` to keep XDP enabled without zero copy. Default validator startup now - requires the XDP capabilities. +* XDP transmit is now enabled by default on Linux in copy mode on CPU core 1. Use + `--xdp-cpu-cores` to override the XDP CPU assignment. Use `--xdp-zero-copy` with + `--xdp-interface` to opt in to zero copy. Default validator startup now requires the XDP + copy-mode capabilities. * The default PoH pinned CPU core is now CPU core 10. Use `--poh-pinned-cpu-core` to override it. #### Deprecations * `--accounts-db-access-storages-method` is now deprecated and a no-op (the `mmap` value was deprecated in v4.0.0; mmap mode has now been removed entirely). The flag is still accepted for backward compatibility, but account storages are always accessed via file I/O. * `--experimental-poh-pinned-cpu-core` is now deprecated. Use `--poh-pinned-cpu-core` instead. -* `--xdp-zero-copy` is now deprecated and a no-op because XDP zero copy is enabled by default. Use - `--disable-xdp-zero-copy` to opt out of zero copy. #### Changes * Turbine shred ingestion now rejects shreds more than half an epoch in the future (previously up to 2 full epochs ahead was accepted). ### CLI diff --git a/docs/src/operations/running-with-af-xdp.md b/docs/src/operations/running-with-af-xdp.md index 76a7b720788..85b4f62c3ce 100644 --- a/docs/src/operations/running-with-af-xdp.md +++ b/docs/src/operations/running-with-af-xdp.md @@ -15,36 +15,30 @@ Before rolling out XDP on a production validator, you should test it on your set * **Performance Gain:** Confirm that performance is improved with the new configuration (e.g. lower CPU usage or higher throughput in Turbine’s retransmit stage). * **Metric Visibility:** Verify that you can observe the retransmit-stage metrics, which show time spent sending shreds, to gauge the impact of XDP on network transmission. -XDP is enabled by default on Linux in Agave. The default XDP configuration uses CPU core 1 and enables zero copy. Zero copy requires an explicit network interface: - -```bash ---xdp-interface -``` - -To use different CPU cores for XDP, pass: +XDP is enabled by default on Linux in Agave. The default XDP configuration uses CPU core 1 and copy mode. To use different CPU cores for XDP, pass: ```bash --xdp-cpu-cores 2 ``` -Zero copy avoids using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). On NICs or drivers that do not support zero copy, such as the `bnxt_en` driver, keep XDP enabled and pass: +Zero copy avoids using socket buffers for data, but this is only possible when talking directly to the Network Interface Card (NIC). To opt in to zero copy, pass an explicit physical interface: ```bash ---disable-xdp-zero-copy +--xdp-zero-copy --xdp-interface ``` Zero copy cannot be used with a bonded interface itself. When using a bonded network interface, specify the underlying member interface to which the XDP program should be attached: ```bash ---xdp-interface +--xdp-zero-copy --xdp-interface ``` Also note that XDP and PoH *must* be assigned to separate (physical) cores. PoH defaults to CPU core 10, and XDP defaults to CPU core 1. The --poh-pinned-cpu-core N flag can be used to move the PoH thread. -Next, your validator binary will need to have access to a few higher level permissions. With default zero-copy XDP, the validator process requires the CAP_NET_RAW, CAP_NET_ADMIN, CAP_BPF, and CAP_PERFMON capabilities. Passing --disable-xdp-zero-copy avoids the CAP_BPF and CAP_PERFMON requirements. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_BPF CAP_PERFMON under the [Service] section or directly on the binary with the command: +Next, your validator binary will need to have access to a few higher level permissions. With default copy-mode XDP, the validator process requires the CAP_NET_RAW and CAP_NET_ADMIN capabilities. Zero copy additionally requires CAP_BPF and CAP_PERFMON. These capabilities can be configured in the systemd service file by setting CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN under the [Service] section or directly on the binary with the command: ```bash -sudo setcap cap_net_raw,cap_net_admin,cap_bpf,cap_perfmon=p +sudo setcap cap_net_raw,cap_net_admin=p #this command must be run each time the binary is replaced ``` @@ -87,7 +81,7 @@ modinfo bnxt_en | `igb` / Intel I210 | ✅ Works | ✅ Works w/ caveat | caveat: `igb` requires kernel `>= 6.14` for ZC. Field report: I210 on 6.17 enabled ZC but had severe network degradation/high skips, so fall back to non-ZC if unstable. | | `ixgbe` / Intel X540, X550 | ✅ Works | ⚠️ Mixed / unstable | Alessandro guidance for freeze/link-flap cases: start without ZC while `ixgbe` is debugged. Stay tuned! | | `ice` / Intel E800 | ✅ Works | ✅ Works | `ice` supports native XDP and AF_XDP zero-copy. Caveats: XDP is blocked for frame sizes larger than 3KB | -| `bnxt_en` / Broadcom | ✅ Works | ❌ Does not work | `bnxt_en` works with XDP, but pass `--disable-xdp-zero-copy`. Broadcom non-ZC can still be reasonably fast. But please get a non-broadcom NIC | +| `bnxt_en` / Broadcom | ✅ Works | ❌ Does not work | `bnxt_en` works with default copy-mode XDP. Broadcom non-ZC can still be reasonably fast. But please get a non-broadcom NIC | | `tg3` / Broadcom | ❌ No native/driver XDP; generic XDP only at best | ❌ Does not work | Broadcom BCM5720 uses the `tg3` driver. Treat as unsupported for Agave/AF_XDP performance work: no native XDP and no AF_XDP zero-copy. | | `r8169` / Realtek | ❌ No native/driver XDP; generic XDP only at best | ❌ Does not work | Realtek NICs using `r8169` should be treated as unsupported for Agave/AF_XDP performance work: no native XDP and no AF_XDP zero-copy.| | `mlx4_en` / Mellanox ConnectX-3 | ❌ Do not use | ❌ Does not work | Driver is no longer supported. Zero-copy does not work. Do not use. | diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 9e5cba75058..86319a012f9 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -202,8 +202,8 @@ fn deprecated_arguments() -> Vec { .conflicts_with("disable_xdp") .conflicts_with("disable_xdp_zero_copy") .conflicts_with("xdp_zero_copy") - .help("No-op; XDP zero copy is enabled by default"), - usage_warning: "XDP zero copy is enabled by default. Use --disable-xdp-zero-copy to disable it.", + .help("Enable XDP zero copy. Use --xdp-zero-copy instead"), + replaced_by: "xdp-zero-copy", ); add_arg!( // deprecated in v4.0.0 @@ -214,16 +214,6 @@ fn deprecated_arguments() -> Vec { .help("Controls the TPU connection pool size per remote address"), usage_warning:"This parameter is misleading, avoid setting it", ); - add_arg!( - // deprecated in v4.2.0 - Arg::with_name("xdp_zero_copy") - .long("xdp-zero-copy") - .takes_value(false) - .conflicts_with("disable_xdp") - .conflicts_with("disable_xdp_zero_copy") - .help("No-op; XDP zero copy is enabled by default"), - usage_warning: "XDP zero copy is enabled by default. Use --disable-xdp-zero-copy to disable it.", - ); res } diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 3f2dd519a2a..1afd948229c 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -1230,6 +1230,14 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .conflicts_with("disable_xdp") .help("Disable XDP zero copy while leaving XDP transmit enabled"), ) + .arg( + Arg::with_name("xdp_zero_copy") + .long("xdp-zero-copy") + .takes_value(false) + .conflicts_with("disable_xdp") + .conflicts_with("disable_xdp_zero_copy") + .help("Enable XDP zero copy"), + ) .arg( Arg::with_name("xdp_interface") .long("xdp-interface") diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index fdc1ccdd021..fae8f772127 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -135,12 +135,12 @@ fn parse_xdp_transmit_config( let xdp_interface = matches .value_of("xdp_interface") .or_else(|| matches.value_of("experimental_retransmit_xdp_interface")); - let xdp_zero_copy = !matches.is_present("disable_xdp_zero_copy"); + let xdp_zero_copy = matches.is_present("xdp_zero_copy") + || matches.is_present("experimental_retransmit_xdp_zero_copy"); if xdp_zero_copy && xdp_interface.is_none() { return Err(String::from( "XDP zero copy requires an explicit network interface. Use --xdp-interface to \ - select the XDP interface, or --disable-xdp-zero-copy to use XDP without zero \ - copy", + select the XDP interface", )); } let xdp_cpus = matches @@ -1490,20 +1490,36 @@ mod tests { } #[test] - fn default_xdp_config_requires_interface_for_zero_copy() { + fn default_xdp_config_uses_copy_mode_and_default_cpu() { let bind_addresses = BindIpAddrs::default(); + let config = xdp_config_for_args(&[], &bind_addresses).unwrap().unwrap(); - let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); + assert_eq!(config.interface, None); + assert_eq!( + config.cpus, + vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] + ); + assert!(!config.zero_copy); + } + + #[test] + fn xdp_zero_copy_requires_interface() { + let bind_addresses = BindIpAddrs::default(); + + let err = xdp_config_for_args(&["--xdp-zero-copy"], &bind_addresses).unwrap_err(); assert!(err.contains("--xdp-interface")); - assert!(err.contains("--disable-xdp-zero-copy")); + assert!(!err.contains("--disable-xdp")); } #[test] - fn default_xdp_config_uses_zero_copy_default_cpu_and_configured_interface() { + fn xdp_zero_copy_uses_default_cpu_and_configured_interface() { let bind_addresses = BindIpAddrs::default(); - let config = xdp_config_for_args(&["--xdp-interface", "eth0"], &bind_addresses) - .unwrap() - .unwrap(); + let config = xdp_config_for_args( + &["--xdp-zero-copy", "--xdp-interface", "eth0"], + &bind_addresses, + ) + .unwrap() + .unwrap(); assert_eq!(config.interface.as_deref(), Some("eth0")); assert_eq!( @@ -1514,10 +1530,14 @@ mod tests { } #[test] - fn xdp_zero_copy_accepts_deprecated_interface_arg() { + fn xdp_zero_copy_accepts_deprecated_args() { let bind_addresses = BindIpAddrs::default(); let config = xdp_config_for_args( - &["--experimental-retransmit-xdp-interface", "eth0"], + &[ + "--experimental-retransmit-xdp-zero-copy", + "--experimental-retransmit-xdp-interface", + "eth0", + ], &bind_addresses, ) .unwrap() @@ -1536,7 +1556,7 @@ mod tests { } #[test] - fn xdp_cpu_interface_and_zero_copy_are_configurable() { + fn xdp_cpu_and_interface_are_configurable_in_copy_mode() { let bind_addresses = BindIpAddrs::default(); let config = xdp_config_for_args( &[ From 8595e79ea3bc59604e285c6e9cc15cfd9bb28f28 Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 16:49:33 +0000 Subject: [PATCH 5/9] fix --- validator/src/cli.rs | 1 - validator/src/commands/run/args.rs | 11 ----------- validator/src/commands/run/execute.rs | 24 +----------------------- 3 files changed, 1 insertion(+), 35 deletions(-) diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 86319a012f9..d0b706c0539 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -200,7 +200,6 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-zero-copy") .takes_value(false) .conflicts_with("disable_xdp") - .conflicts_with("disable_xdp_zero_copy") .conflicts_with("xdp_zero_copy") .help("Enable XDP zero copy. Use --xdp-zero-copy instead"), replaced_by: "xdp-zero-copy", diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 1afd948229c..17b7f821f4c 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -1218,24 +1218,13 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .conflicts_with("xdp_cpu_cores") .conflicts_with("xdp_interface") .conflicts_with("xdp_zero_copy") - .conflicts_with("disable_xdp_zero_copy") .help("Disable XDP transmit, which is enabled by default"), ) - .arg( - Arg::with_name("disable_xdp_zero_copy") - .long("disable-xdp-zero-copy") - .takes_value(false) - .conflicts_with("experimental_retransmit_xdp_zero_copy") - .conflicts_with("xdp_zero_copy") - .conflicts_with("disable_xdp") - .help("Disable XDP zero copy while leaving XDP transmit enabled"), - ) .arg( Arg::with_name("xdp_zero_copy") .long("xdp-zero-copy") .takes_value(false) .conflicts_with("disable_xdp") - .conflicts_with("disable_xdp_zero_copy") .help("Enable XDP zero copy"), ) .arg( diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index fae8f772127..265519ed46b 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -115,7 +115,6 @@ fn parse_xdp_transmit_config( || matches .value_of("experimental_retransmit_xdp_interface") .is_some() - || matches.is_present("disable_xdp_zero_copy") || matches.is_present("xdp_zero_copy") || matches.is_present("experimental_retransmit_xdp_zero_copy"); if xdp_config_requested { @@ -1559,13 +1558,7 @@ mod tests { fn xdp_cpu_and_interface_are_configurable_in_copy_mode() { let bind_addresses = BindIpAddrs::default(); let config = xdp_config_for_args( - &[ - "--xdp-interface", - "eth0", - "--xdp-cpu-cores", - "2-3", - "--disable-xdp-zero-copy", - ], + &["--xdp-interface", "eth0", "--xdp-cpu-cores", "2-3"], &bind_addresses, ) .unwrap() @@ -1576,21 +1569,6 @@ mod tests { assert!(!config.zero_copy); } - #[test] - fn xdp_without_zero_copy_can_infer_interface() { - let bind_addresses = BindIpAddrs::default(); - let config = xdp_config_for_args(&["--disable-xdp-zero-copy"], &bind_addresses) - .unwrap() - .unwrap(); - - assert_eq!(config.interface, None); - assert_eq!( - config.cpus, - vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] - ); - assert!(!config.zero_copy); - } - #[test] fn xdp_requires_single_bind_address() { let bind_addresses = BindIpAddrs::new(vec![ From 626e4af62ba6a85ec572c5347ab5461a541f9102 Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 16:59:03 +0000 Subject: [PATCH 6/9] add initialize --- validator/src/commands/run/execute.rs | 32 +++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 265519ed46b..b6e33b1707c 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -83,7 +83,7 @@ use { }, }; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Operation { Initialize, Run, @@ -99,8 +99,9 @@ fn parse_poh_pinned_cpu_core(matches: &ArgMatches) -> Option { fn parse_xdp_transmit_config( matches: &ArgMatches, bind_addresses: &BindIpAddrs, + operation: Operation, ) -> Result, String> { - if matches.is_present("disable_xdp") { + if matches.is_present("disable_xdp") || operation == Operation::Initialize { return Ok(None); } @@ -230,7 +231,7 @@ pub fn execute( } } - let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses)?; + let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses, operation)?; let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) @@ -1432,11 +1433,19 @@ mod tests { fn xdp_config_for_args( args: &[&str], bind_addresses: &BindIpAddrs, + ) -> Result, String> { + xdp_config_for_args_and_operation(args, bind_addresses, Operation::Run) + } + + fn xdp_config_for_args_and_operation( + args: &[&str], + bind_addresses: &BindIpAddrs, + operation: Operation, ) -> Result, String> { let default_args = cli::DefaultArgs::default(); let matches = cli::app("test", &default_args).get_matches_from([&["agave-validator"], args].concat()); - parse_xdp_transmit_config(&matches, bind_addresses) + parse_xdp_transmit_config(&matches, bind_addresses, operation) } #[test] @@ -1554,6 +1563,21 @@ mod tests { .is_none()); } + #[test] + fn init_returns_no_xdp_config() { + let bind_addresses = BindIpAddrs::default(); + assert!(xdp_config_for_args_and_operation(&[], &bind_addresses, Operation::Initialize) + .unwrap() + .is_none()); + assert!(xdp_config_for_args_and_operation( + &["--xdp-zero-copy"], + &bind_addresses, + Operation::Initialize, + ) + .unwrap() + .is_none()); + } + #[test] fn xdp_cpu_and_interface_are_configurable_in_copy_mode() { let bind_addresses = BindIpAddrs::default(); From aa39b471c0de0befc748147b490f6efeecb03a8a Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 17:49:15 +0000 Subject: [PATCH 7/9] change to --no-xdp --- multinode-demo/bootstrap-validator.sh | 1 + multinode-demo/validator.sh | 1 + scripts/run.sh | 1 + validator/src/cli.rs | 6 +++--- validator/src/commands/run/args.rs | 12 ++++++------ validator/src/commands/run/execute.rs | 16 ++++++++-------- 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/multinode-demo/bootstrap-validator.sh b/multinode-demo/bootstrap-validator.sh index 6b231383e1d..fa75b51a88d 100755 --- a/multinode-demo/bootstrap-validator.sh +++ b/multinode-demo/bootstrap-validator.sh @@ -144,6 +144,7 @@ args+=( --no-wait-for-vote-to-start-leader --full-rpc-api --allow-private-addr + --no-xdp ) default_arg --gossip-port 8001 default_arg --log - diff --git a/multinode-demo/validator.sh b/multinode-demo/validator.sh index 0d76fab0284..07bd51a86fb 100755 --- a/multinode-demo/validator.sh +++ b/multinode-demo/validator.sh @@ -10,6 +10,7 @@ args=( --max-genesis-archive-unpacked-size 1073741824 --no-poh-speed-test --no-os-network-limits-test + --no-xdp ) airdrops_enabled=1 node_sol=500 # 500 SOL: number of SOL to airdrop the node for transaction fees and vote account rent exemption (ignored if airdrops_enabled=0) diff --git a/scripts/run.sh b/scripts/run.sh index 18f13e339b1..5452a8d4dfc 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -121,6 +121,7 @@ args=( --require-tower --no-wait-for-vote-to-start-leader --no-os-network-limits-test + --no-xdp ) # shellcheck disable=SC2086 agave-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS & diff --git a/validator/src/cli.rs b/validator/src/cli.rs index d0b706c0539..a910aa7814b 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -175,7 +175,7 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .conflicts_with("xdp_cpu_cores") .validator(|value| { validate_cpu_ranges(value, "--experimental-retransmit-xdp-cpu-cores") @@ -189,7 +189,7 @@ fn deprecated_arguments() -> Vec { .long("experimental-retransmit-xdp-interface") .takes_value(true) .value_name("INTERFACE") - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .conflicts_with("xdp_interface") .help("Network interface to use for XDP. Use --xdp-interface instead"), replaced_by: "xdp-interface", @@ -199,7 +199,7 @@ fn deprecated_arguments() -> Vec { Arg::with_name("experimental_retransmit_xdp_zero_copy") .long("experimental-retransmit-xdp-zero-copy") .takes_value(false) - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .conflicts_with("xdp_zero_copy") .help("Enable XDP zero copy. Use --xdp-zero-copy instead"), replaced_by: "xdp-zero-copy", diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index 17b7f821f4c..bc79de19a63 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -1209,8 +1209,8 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .help(DefaultSchedulerPool::cli_message()), ) .arg( - Arg::with_name("disable_xdp") - .long("disable-xdp") + Arg::with_name("no_xdp") + .long("no-xdp") .takes_value(false) .conflicts_with("experimental_retransmit_xdp_cpu_cores") .conflicts_with("experimental_retransmit_xdp_interface") @@ -1218,13 +1218,13 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .conflicts_with("xdp_cpu_cores") .conflicts_with("xdp_interface") .conflicts_with("xdp_zero_copy") - .help("Disable XDP transmit, which is enabled by default"), + .help("Do not use XDP transmit"), ) .arg( Arg::with_name("xdp_zero_copy") .long("xdp-zero-copy") .takes_value(false) - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .help("Enable XDP zero copy"), ) .arg( @@ -1232,7 +1232,7 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .long("xdp-interface") .takes_value(true) .value_name("INTERFACE") - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .help("Network interface to use for XDP. Required when XDP zero copy is enabled"), ) .arg( @@ -1240,7 +1240,7 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .long("xdp-cpu-cores") .takes_value(true) .value_name("CPU_LIST") - .conflicts_with("disable_xdp") + .conflicts_with("no_xdp") .validator(|value| validate_cpu_ranges(value, "--xdp-cpu-cores")) .help("Use the specified CPU cores for XDP. Defaults to CPU core 1"), ) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index b6e33b1707c..4bdbba47276 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -101,7 +101,7 @@ fn parse_xdp_transmit_config( bind_addresses: &BindIpAddrs, operation: Operation, ) -> Result, String> { - if matches.is_present("disable_xdp") || operation == Operation::Initialize { + if matches.is_present("no_xdp") || operation == Operation::Initialize { return Ok(None); } @@ -1516,7 +1516,7 @@ mod tests { let err = xdp_config_for_args(&["--xdp-zero-copy"], &bind_addresses).unwrap_err(); assert!(err.contains("--xdp-interface")); - assert!(!err.contains("--disable-xdp")); + assert!(!err.contains("--no-xdp")); } #[test] @@ -1556,9 +1556,9 @@ mod tests { } #[test] - fn disable_xdp_returns_no_config() { + fn no_xdp_returns_no_config() { let bind_addresses = BindIpAddrs::default(); - assert!(xdp_config_for_args(&["--disable-xdp"], &bind_addresses) + assert!(xdp_config_for_args(&["--no-xdp"], &bind_addresses) .unwrap() .is_none()); } @@ -1603,18 +1603,18 @@ mod tests { let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); assert!(err.contains("multihoming")); - assert!(!err.contains("--disable-xdp")); - assert!(xdp_config_for_args(&["--disable-xdp"], &bind_addresses) + assert!(!err.contains("--no-xdp")); + assert!(xdp_config_for_args(&["--no-xdp"], &bind_addresses) .unwrap() .is_none()); } #[test] - fn disable_xdp_conflicts_with_xdp_overrides() { + fn no_xdp_conflicts_with_xdp_overrides() { let default_args = cli::DefaultArgs::default(); let matches = cli::app("test", &default_args).get_matches_from_safe(vec![ "agave-validator", - "--disable-xdp", + "--no-xdp", "--xdp-cpu-cores", "2", ]); From 32a35f2556999f1fcfd05c4b195530ace78ddb0d Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 12 Jun 2026 23:28:15 +0000 Subject: [PATCH 8/9] integrate with cpu utils --- poh/src/poh_service.rs | 6 +- validator/src/cli.rs | 4 +- validator/src/commands/run/args.rs | 14 +- validator/src/commands/run/execute.rs | 283 +++++++++++++++++++++----- xdp/src/tx_loop.rs | 4 +- 5 files changed, 251 insertions(+), 60 deletions(-) diff --git a/poh/src/poh_service.rs b/poh/src/poh_service.rs index 06d12835f22..82cb74913a6 100644 --- a/poh/src/poh_service.rs +++ b/poh/src/poh_service.rs @@ -154,12 +154,12 @@ impl PohService { #[cfg(target_os = "linux")] if let Some(pinned_cpu_core) = pinned_cpu_core { // PoH service runs in a tight loop, generating hashes as fast as possible. - // Let's dedicate one of the CPU cores to this thread so that it can gain - // from cache performance. + // Dedicate one CPU core to this thread for cache performance. let pinned_cpu = CpuId::new(pinned_cpu_core).unwrap(); + info!("Pinning PoH service to CPU core {pinned_cpu_core}"); set_cpu_affinity(None, [pinned_cpu]).unwrap_or_else(|e| { panic!( - "Failed to set CPU affinity for POH service to CPU \ + "Failed to set CPU affinity for PoH service to CPU \ {pinned_cpu_core}: {e:?}. This is critical for performance." ) }); diff --git a/validator/src/cli.rs b/validator/src/cli.rs index a910aa7814b..0158c57266e 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -163,9 +163,9 @@ fn deprecated_arguments() -> Vec { Arg::with_name("experimental_poh_pinned_cpu_core") .long("experimental-poh-pinned-cpu-core") .takes_value(true) - .value_name("CPU_CORE_INDEX") + .value_name("CPU_ID") .conflicts_with("poh_pinned_cpu_core") - .validator(|s| usize::from_str(&s).map(|_| ()).map_err(|e| e.to_string())) + .validator(is_parsable::) .help("Specify which CPU core PoH is pinned to. Use --poh-pinned-cpu-core instead"), replaced_by: "poh-pinned-cpu-core", ); diff --git a/validator/src/commands/run/args.rs b/validator/src/commands/run/args.rs index bc79de19a63..e6725f4e560 100644 --- a/validator/src/commands/run/args.rs +++ b/validator/src/commands/run/args.rs @@ -30,12 +30,11 @@ use { solana_send_transaction_service::send_transaction_service::Config as SendTransactionServiceConfig, solana_signer::Signer, solana_unified_scheduler_pool::DefaultSchedulerPool, - std::{collections::HashSet, net::SocketAddr, path::PathBuf, str::FromStr}, + std::{collections::HashSet, net::SocketAddr, path::PathBuf}, }; const EXCLUDE_KEY: &str = "account-index-exclude-key"; const INCLUDE_KEY: &str = "account-index-include-key"; -pub const DEFAULT_XDP_CPU_CORE: usize = 1; pub mod account_secondary_indexes; pub mod blockstore_options; @@ -867,9 +866,9 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, Arg::with_name("poh_pinned_cpu_core") .long("poh-pinned-cpu-core") .takes_value(true) - .value_name("CPU_CORE_INDEX") - .validator(|s| usize::from_str(&s).map(|_| ()).map_err(|e| e.to_string())) - .help("Specify which CPU core PoH is pinned to"), + .value_name("CPU_ID") + .validator(is_parsable::) + .help("Specify which CPU core PoH is pinned to. Defaults to CPU 10 on Linux"), ) .arg( Arg::with_name("poh_hashes_per_batch") @@ -1242,7 +1241,10 @@ pub fn add_args<'a>(app: App<'a, 'a>, default_args: &'a DefaultArgs) -> App<'a, .value_name("CPU_LIST") .conflicts_with("no_xdp") .validator(|value| validate_cpu_ranges(value, "--xdp-cpu-cores")) - .help("Use the specified CPU cores for XDP. Defaults to CPU core 1"), + .help( + "Use the specified CPU cores for XDP. Defaults to an auto-selected CPU on a \ + physical core separate from PoH", + ), ) .args(&pub_sub_config::args(/*test_validator:*/ false)) .args(&json_rpc_config::args()) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 4bdbba47276..5ddb1e3a721 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -1,3 +1,5 @@ +#[cfg(target_os = "linux")] +use agave_cpu_utils::{CpuId, cpu_affinity, set_cpu_affinity}; use { crate::{ admin_rpc_service::{self, StakedNodesOverrides, load_staked_nodes_overrides}, @@ -89,17 +91,25 @@ pub enum Operation { Run, } -#[cfg(target_os = "linux")] fn parse_poh_pinned_cpu_core(matches: &ArgMatches) -> Option { - value_of(matches, "poh_pinned_cpu_core") - .or_else(|| value_of(matches, "experimental_poh_pinned_cpu_core")) - .or(poh_service::DEFAULT_PINNED_CPU_CORE) + #[cfg(target_os = "linux")] + { + value_of(matches, "poh_pinned_cpu_core") + .or_else(|| value_of(matches, "experimental_poh_pinned_cpu_core")) + .or(poh_service::DEFAULT_PINNED_CPU_CORE) + } + #[cfg(not(target_os = "linux"))] + { + let _ = matches; + None + } } fn parse_xdp_transmit_config( matches: &ArgMatches, bind_addresses: &BindIpAddrs, operation: Operation, + poh_pinned_cpu_core: Option, ) -> Result, String> { if matches.is_present("no_xdp") || operation == Operation::Initialize { return Ok(None); @@ -107,7 +117,7 @@ fn parse_xdp_transmit_config( #[cfg(not(target_os = "linux"))] { - let _ = bind_addresses; + let _ = (bind_addresses, poh_pinned_cpu_core); let xdp_config_requested = matches.value_of("xdp_cpu_cores").is_some() || matches .value_of("experimental_retransmit_xdp_cpu_cores") @@ -126,10 +136,11 @@ fn parse_xdp_transmit_config( #[cfg(target_os = "linux")] { + let poh_pinned_cpu_core = poh_pinned_cpu_core.ok_or_else(|| { + String::from("XDP requires PoH to be pinned to a CPU core") + })?; if bind_addresses.len() > 1 { - return Err(String::from( - "XDP cannot be used in a multihoming context", - )); + return Err(String::from("XDP cannot be used in a multihoming context")); } let xdp_interface = matches @@ -143,20 +154,120 @@ fn parse_xdp_transmit_config( select the XDP interface", )); } - let xdp_cpus = matches + let xdp_cpu_ranges = matches .value_of("xdp_cpu_cores") - .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")) - .map(|cpu_ranges| { - solana_clap_utils::input_parsers::parse_cpu_ranges(cpu_ranges) - .map_err(|err| err.to_string()) - }) - .transpose()? - .unwrap_or_else(|| vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE]); + .or_else(|| matches.value_of("experimental_retransmit_xdp_cpu_cores")); + let xdp_cpus = if let Some(cpu_ranges) = xdp_cpu_ranges { + let cpus = solana_clap_utils::input_parsers::parse_cpu_ranges(cpu_ranges) + .map_err(|err| err.to_string())?; + validate_xdp_cpus(&cpus, poh_pinned_cpu_core)?; + cpus + } else { + let allowed_cpus = cpu_affinity(None) + .map_err(|err| { + format!( + "failed to query CPU affinity for XDP CPU selection: {err}. \ + Provide --xdp-cpu-cores explicitly" + ) + })? + .iter() + .map(|cpu| **cpu) + .collect::>(); + vec![select_default_xdp_cpu( + &allowed_cpus, + poh_pinned_cpu_core, + read_thread_siblings_list, + )?] + }; + info!("XDP enabled on CPU cores: {xdp_cpus:?}"); Ok(Some(XdpConfig::new(xdp_interface, xdp_cpus, xdp_zero_copy))) } } +#[cfg(target_os = "linux")] +fn validate_xdp_cpus(cpus: &[usize], poh_pinned_cpu_core: usize) -> Result<(), String> { + for cpu in cpus { + CpuId::new(*cpu).map_err(|err| format!("invalid XDP CPU core {cpu}: {err}"))?; + } + validate_xdp_cpus_are_separate_from_poh_physical_core( + cpus, + poh_pinned_cpu_core, + read_thread_siblings_list, + ) +} + +#[cfg(target_os = "linux")] +fn read_thread_siblings_list(cpu: usize) -> Result, String> { + let path = Path::new("/sys/devices/system/cpu") + .join(format!("cpu{cpu}")) + .join("topology/thread_siblings_list"); + let cpu_ranges = fs::read_to_string(&path) + .map_err(|err| format!("failed to read {}: {err}", path.display()))?; + solana_clap_utils::input_parsers::parse_cpu_ranges(cpu_ranges.trim()) + .map_err(|err| format!("failed to parse {}: {err}", path.display())) +} + +#[cfg(target_os = "linux")] +fn validate_xdp_cpus_are_separate_from_poh_physical_core( + cpus: &[usize], + poh_pinned_cpu_core: usize, + thread_siblings: F, +) -> Result<(), String> +where + F: Fn(usize) -> Result, String>, +{ + for cpu in cpus { + if cpu_shares_physical_core_with_poh(*cpu, poh_pinned_cpu_core, &thread_siblings)? { + return Err(format!( + "XDP CPU core {cpu} shares a physical core with PoH CPU core \ + {poh_pinned_cpu_core}; provide --xdp-cpu-cores with CPU cores on separate \ + physical cores" + )); + } + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn select_default_xdp_cpu( + allowed_cpus: &[usize], + poh_pinned_cpu_core: usize, + thread_siblings: F, +) -> Result +where + F: Fn(usize) -> Result, String>, +{ + CpuId::new(poh_pinned_cpu_core) + .map_err(|err| format!("invalid PoH CPU core {poh_pinned_cpu_core}: {err}"))?; + for cpu in allowed_cpus.iter().rev().copied() { + CpuId::new(cpu).map_err(|err| format!("invalid allowed CPU core {cpu}: {err}"))?; + if !cpu_shares_physical_core_with_poh(cpu, poh_pinned_cpu_core, &thread_siblings)? { + return Ok(cpu); + } + } + + Err(format!( + "XDP requires an available CPU core on a physical core separate from PoH CPU core \ + {poh_pinned_cpu_core}; provide --xdp-cpu-cores explicitly" + )) +} + +#[cfg(target_os = "linux")] +fn cpu_shares_physical_core_with_poh( + cpu: usize, + poh_pinned_cpu_core: usize, + thread_siblings: &F, +) -> Result +where + F: Fn(usize) -> Result, String>, +{ + if cpu == poh_pinned_cpu_core { + return Ok(true); + } + Ok(thread_siblings(cpu)?.contains(&poh_pinned_cpu_core)) +} + pub fn execute( matches: &ArgMatches, solana_version: &str, @@ -231,7 +342,14 @@ pub fn execute( } } - let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses, operation)?; + let poh_pinned_cpu_core = parse_poh_pinned_cpu_core(matches); + if let Some(poh_pinned_cpu_core) = poh_pinned_cpu_core { + info!("PoH pinned CPU core: {poh_pinned_cpu_core}"); + } else { + info!("PoH is not pinned to a CPU core"); + } + let xdp_transmit_config = + parse_xdp_transmit_config(matches, &bind_addresses, operation, poh_pinned_cpu_core)?; let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) @@ -471,10 +589,27 @@ pub fn execute( let (xdp_transmit_setup, xdp_network_config_report) = (None, None); #[cfg(target_os = "linux")] - let poh_pinned_cpu_core = parse_poh_pinned_cpu_core(matches); - - #[cfg(not(target_os = "linux"))] - let poh_pinned_cpu_core = None; + { + let reserved = xdp_transmit_config + .as_ref() + .map(|xdp| xdp.cpus.clone()) + .unwrap_or_default() + .into_iter() + .map(CpuId::new) + .collect::>>()?; + if !reserved.is_empty() { + let available = cpu_affinity(None)? + .into_iter() + .filter(|cpu| !reserved.contains(cpu)) + .collect::>(); + if available.is_empty() { + Err(String::from( + "XDP reserved all available CPU cores; no CPU available for the validator main thread", + ))?; + } + set_cpu_affinity(None, available.iter().copied())?; + } + } solana_core::validator::report_target_features(); @@ -1445,7 +1580,8 @@ mod tests { let default_args = cli::DefaultArgs::default(); let matches = cli::app("test", &default_args).get_matches_from([&["agave-validator"], args].concat()); - parse_xdp_transmit_config(&matches, bind_addresses, operation) + let poh_pinned_cpu_core = parse_poh_pinned_cpu_core(&matches); + parse_xdp_transmit_config(&matches, bind_addresses, operation, poh_pinned_cpu_core) } #[test] @@ -1498,15 +1634,13 @@ mod tests { } #[test] - fn default_xdp_config_uses_copy_mode_and_default_cpu() { + fn default_xdp_config_uses_copy_mode_and_auto_selected_cpu() { let bind_addresses = BindIpAddrs::default(); let config = xdp_config_for_args(&[], &bind_addresses).unwrap().unwrap(); assert_eq!(config.interface, None); - assert_eq!( - config.cpus, - vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] - ); + assert_eq!(config.cpus.len(), 1); + assert_ne!(Some(config.cpus[0]), poh_service::DEFAULT_PINNED_CPU_CORE); assert!(!config.zero_copy); } @@ -1530,10 +1664,8 @@ mod tests { .unwrap(); assert_eq!(config.interface.as_deref(), Some("eth0")); - assert_eq!( - config.cpus, - vec![crate::commands::run::args::DEFAULT_XDP_CPU_CORE] - ); + assert_eq!(config.cpus.len(), 1); + assert_ne!(Some(config.cpus[0]), poh_service::DEFAULT_PINNED_CPU_CORE); assert!(config.zero_copy); } @@ -1558,31 +1690,44 @@ mod tests { #[test] fn no_xdp_returns_no_config() { let bind_addresses = BindIpAddrs::default(); - assert!(xdp_config_for_args(&["--no-xdp"], &bind_addresses) - .unwrap() - .is_none()); + assert!( + xdp_config_for_args(&["--no-xdp"], &bind_addresses) + .unwrap() + .is_none() + ); } #[test] fn init_returns_no_xdp_config() { let bind_addresses = BindIpAddrs::default(); - assert!(xdp_config_for_args_and_operation(&[], &bind_addresses, Operation::Initialize) + assert!( + xdp_config_for_args_and_operation(&[], &bind_addresses, Operation::Initialize) + .unwrap() + .is_none() + ); + assert!( + xdp_config_for_args_and_operation( + &["--xdp-zero-copy"], + &bind_addresses, + Operation::Initialize, + ) .unwrap() - .is_none()); - assert!(xdp_config_for_args_and_operation( - &["--xdp-zero-copy"], - &bind_addresses, - Operation::Initialize, - ) - .unwrap() - .is_none()); + .is_none() + ); } #[test] fn xdp_cpu_and_interface_are_configurable_in_copy_mode() { let bind_addresses = BindIpAddrs::default(); let config = xdp_config_for_args( - &["--xdp-interface", "eth0", "--xdp-cpu-cores", "2-3"], + &[ + "--poh-pinned-cpu-core", + "1023", + "--xdp-interface", + "eth0", + "--xdp-cpu-cores", + "2-3", + ], &bind_addresses, ) .unwrap() @@ -1604,9 +1749,11 @@ mod tests { let err = xdp_config_for_args(&[], &bind_addresses).unwrap_err(); assert!(err.contains("multihoming")); assert!(!err.contains("--no-xdp")); - assert!(xdp_config_for_args(&["--no-xdp"], &bind_addresses) - .unwrap() - .is_none()); + assert!( + xdp_config_for_args(&["--no-xdp"], &bind_addresses) + .unwrap() + .is_none() + ); } #[test] @@ -1621,4 +1768,46 @@ mod tests { assert!(matches.is_err()); } + + fn test_thread_siblings(cpu: usize) -> Result, String> { + Ok(match cpu { + 2 | 10 => vec![2, 10], + 3 | 11 => vec![3, 11], + _ => vec![cpu], + }) + } + + #[test] + fn default_xdp_cpu_skips_poh_physical_core() { + assert_eq!( + select_default_xdp_cpu(&[3, 2], 10, test_thread_siblings), + Ok(3) + ); + } + + #[test] + fn default_xdp_cpu_errors_without_separate_physical_core() { + let err = select_default_xdp_cpu(&[2, 10], 10, test_thread_siblings).unwrap_err(); + assert!(err.contains("physical core separate from PoH")); + assert!(err.contains("--xdp-cpu-cores")); + assert!(!err.contains("--no-xdp")); + } + + #[test] + fn explicit_xdp_cpu_rejects_poh_physical_core() { + let err = + validate_xdp_cpus_are_separate_from_poh_physical_core(&[2], 10, test_thread_siblings) + .unwrap_err(); + assert!(err.contains("shares a physical core")); + assert!(err.contains("--xdp-cpu-cores")); + assert!(!err.contains("--no-xdp")); + } + + #[test] + fn explicit_xdp_cpu_accepts_separate_physical_core() { + assert!( + validate_xdp_cpus_are_separate_from_poh_physical_core(&[3], 10, test_thread_siblings,) + .is_ok() + ); + } } diff --git a/xdp/src/tx_loop.rs b/xdp/src/tx_loop.rs index 80f383628c6..7f9b62f142c 100644 --- a/xdp/src/tx_loop.rs +++ b/xdp/src/tx_loop.rs @@ -17,7 +17,7 @@ use { socket::{Socket, Tx, TxRing}, umem::{Frame, OwnedUmem, PageAlignedMemory, Umem}, }, - agave_cpu_utils::set_cpu_affinity, + agave_cpu_utils::{CpuId, set_cpu_affinity}, crossbeam_channel::{Receiver, Sender, TryRecvError}, libc::{_SC_PAGESIZE, sysconf}, std::{ @@ -240,7 +240,7 @@ impl TxLoop { } = self; // each queue is bound to its own CPU core - set_cpu_affinity(None, [agave_cpu_utils::CpuId::new(cpu_id).unwrap()]).unwrap(); + set_cpu_affinity(None, [CpuId::new(cpu_id).unwrap()]).unwrap(); let umem = socket.umem(); let umem_tx_capacity = umem.available(); From 3431505e16fa47ecae24d154fd31dd8e9f5d8f9a Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 14 Jun 2026 09:25:52 +0000 Subject: [PATCH 9/9] gate linux --- validator/src/commands/run/execute.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 5ddb1e3a721..fe153c2fc9b 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -343,10 +343,13 @@ pub fn execute( } let poh_pinned_cpu_core = parse_poh_pinned_cpu_core(matches); - if let Some(poh_pinned_cpu_core) = poh_pinned_cpu_core { - info!("PoH pinned CPU core: {poh_pinned_cpu_core}"); - } else { - info!("PoH is not pinned to a CPU core"); + #[cfg(target_os = "linux")] + { + if let Some(poh_pinned_cpu_core) = poh_pinned_cpu_core { + info!("PoH pinned CPU core: {poh_pinned_cpu_core}"); + } else { + info!("PoH is not pinned to a CPU core"); + } } let xdp_transmit_config = parse_xdp_transmit_config(matches, &bind_addresses, operation, poh_pinned_cpu_core)?;