Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/cloudai/models/scenario.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -108,7 +108,7 @@ def tdef_model_dump(self, by_alias: bool) -> dict:
"extra_env_vars": self.extra_env_vars if self.extra_env_vars else None,
"cmd_args": self.cmd_args.model_dump(by_alias=by_alias) if self.cmd_args else None,
"git_repos": [repo.model_dump() for repo in self.git_repos] if self.git_repos else None,
"nsys": self.nsys.model_dump() if self.nsys else None,
"nsys": self.nsys.model_dump(exclude_unset=True) if self.nsys else None,
}
return {k: v for k, v in data.items() if v is not None}

Expand Down
159 changes: 158 additions & 1 deletion tests/test_test_scenario.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -515,3 +515,160 @@ def test_get_reporters_nccl(self):
assert len(reporters) == 2
assert NcclTestPerformanceReportGenerationStrategy in reporters
assert NcclTestPredictionReportGenerationStrategy in reporters


class TestNsysMerging:
def test_nsys_partial_override_preserves_base_config(
self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem
):
from cloudai.core import NsysConfiguration

test_scenario_parser.test_mapping = {
"nccl": NCCLTestDefinition(
name="nccl",
description="desc",
test_template_name="NcclTest",
cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"),
nsys=NsysConfiguration(
enable=True,
nsys_binary="/custom/nsys",
output="/base/output",
trace="cuda,nvtx",
sample="cpu",
),
)
}
model = TestScenarioModel.model_validate(
toml.loads(
"""
name = "test"

[[Tests]]
id = "1"
test_name = "nccl"

[Tests.nsys]
output = "/scenario/output"
"""
)
)
tdef = test_scenario_parser._prepare_tdef(model.tests[0])

assert tdef.nsys is not None
assert tdef.nsys.output == "/scenario/output"
assert tdef.nsys.nsys_binary == "/custom/nsys"
assert tdef.nsys.trace == "cuda,nvtx"
assert tdef.nsys.sample == "cpu"
assert tdef.nsys.enable is True

def test_nsys_multiple_fields_override(self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem):
from cloudai.core import NsysConfiguration

test_scenario_parser.test_mapping = {
"nccl": NCCLTestDefinition(
name="nccl",
description="desc",
test_template_name="NcclTest",
cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"),
nsys=NsysConfiguration(
enable=True,
nsys_binary="/base/nsys",
output="/base/output",
trace="cuda",
force_overwrite=False,
),
)
}
model = TestScenarioModel.model_validate(
toml.loads(
"""
name = "test"

[[Tests]]
id = "1"
test_name = "nccl"

[Tests.nsys]
output = "/new/output"
force_overwrite = true
"""
)
)
tdef = test_scenario_parser._prepare_tdef(model.tests[0])

assert tdef.nsys is not None
assert tdef.nsys.output == "/new/output"
assert tdef.nsys.force_overwrite is True
assert tdef.nsys.nsys_binary == "/base/nsys"
assert tdef.nsys.trace == "cuda"
assert tdef.nsys.enable is True

def test_nsys_scenario_adds_to_base_without_nsys(
self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem
):
test_scenario_parser.test_mapping = {
"nccl": NCCLTestDefinition(
name="nccl",
description="desc",
test_template_name="NcclTest",
cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"),
# No nsys in base config
)
}
model = TestScenarioModel.model_validate(
toml.loads(
"""
name = "test"

[[Tests]]
id = "1"
test_name = "nccl"

[Tests.nsys]
output = "/scenario/output"
trace = "cuda,nvtx"
"""
)
)
tdef = test_scenario_parser._prepare_tdef(model.tests[0])

assert tdef.nsys is not None
assert tdef.nsys.output == "/scenario/output"
assert tdef.nsys.trace == "cuda,nvtx"
assert tdef.nsys.enable is True
assert tdef.nsys.nsys_binary == "nsys"

def test_nsys_disable_override(self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem):
from cloudai.core import NsysConfiguration

test_scenario_parser.test_mapping = {
"nccl": NCCLTestDefinition(
name="nccl",
description="desc",
test_template_name="NcclTest",
cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"),
nsys=NsysConfiguration(
enable=True,
output="/base/output",
),
)
}
model = TestScenarioModel.model_validate(
toml.loads(
"""
name = "test"

[[Tests]]
id = "1"
test_name = "nccl"

[Tests.nsys]
enable = false
"""
)
)
tdef = test_scenario_parser._prepare_tdef(model.tests[0])

assert tdef.nsys is not None
assert tdef.nsys.enable is False
assert tdef.nsys.output == "/base/output"