diff --git a/scripts/performance/configs/nemotronh/nemotron_3_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_workload_base_configs.py index 2cafda9789..18ed8d8096 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_workload_base_configs.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_workload_base_configs.py @@ -41,7 +41,7 @@ NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - micro_batch_size=4, + micro_batch_size=2, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_NVFP4_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 @@ -59,7 +59,7 @@ NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - micro_batch_size=4, + micro_batch_size=2, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_NVFP4_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 diff --git a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py index 9cb42e6722..d9397fc7c9 100644 --- a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py +++ b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py @@ -325,7 +325,7 @@ QWEN3_30B_A3B_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], @@ -335,7 +335,7 @@ QWEN3_30B_A3B_PRETRAIN_CONFIG_GB300_FP8_CS_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], @@ -382,7 +382,7 @@ QWEN3_30B_A3B_PRETRAIN_CONFIG_B300_BF16_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], @@ -392,7 +392,7 @@ QWEN3_30B_A3B_PRETRAIN_CONFIG_B300_FP8_CS_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], diff --git a/scripts/performance/configs/qwen_vl/qwen3_vl_workload_base_configs.py b/scripts/performance/configs/qwen_vl/qwen3_vl_workload_base_configs.py index e81cef05f4..07e9934d9a 100644 --- a/scripts/performance/configs/qwen_vl/qwen3_vl_workload_base_configs.py +++ b/scripts/performance/configs/qwen_vl/qwen3_vl_workload_base_configs.py @@ -144,7 +144,7 @@ QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_GB300_BF16 = replace( BASE_QWEN3_VL_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], @@ -154,7 +154,7 @@ QWEN3_VL_30B_A3B_PRETRAIN_CONFIG_GB300_FP8_CS = replace( BASE_QWEN3_VL_30B_A3B_CONFIG, num_gpus=8, - micro_batch_size=8, + micro_batch_size=4, moe_flex_dispatcher_backend="hybridep", cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"],