From 5e0ee6989b360bb99be648cee41be15421338f97 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Thu, 15 Jan 2026 22:09:33 +0800 Subject: [PATCH 01/20] add eval_backend_perf --- graph_net_bench/torch/eval_backend_diff.py | 419 +++++---------------- graph_net_bench/torch/eval_backend_perf.py | 337 +++++++++++++++++ graph_net_bench/torch/utils.py | 11 + test/eval_backend_diff_test.sh | 13 +- 4 files changed, 458 insertions(+), 322 deletions(-) create mode 100644 graph_net_bench/torch/eval_backend_perf.py diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 8488b71b7..07a19ff88 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,103 +1,18 @@ from . import utils import subprocess import argparse -import importlib.util import torch -from pathlib import Path -from typing import Type import sys import os import os.path import traceback import json -import random -import numpy as np -import platform import base64 -from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend -from graph_net_bench.torch.backend.tvm_backend import TvmBackend -from graph_net_bench.torch.backend.xla_backend import XlaBackend -from graph_net_bench.torch.backend.inductor_backend import InductorBackend -from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend -from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend -from graph_net_bench.torch.backend.nope_backend import NopeBackend -from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend -from graph_net_bench.torch.backend.unstable_to_stable_backend import ( - UnstableToStableBackend, -) -from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( - RangeDecomposerValidatorBackend, -) -from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( - GraphVariableRenamerValidatorBackend, -) +import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils -compiler_backend_name2class = { - "tvm": TvmBackend, - "xla": XlaBackend, - "inductor": InductorBackend, - "tensorrt": TensorRTBackend, - "bladedisc": BladeDISCBackend, - "nope": NopeBackend, - "pass_mgr": PassMgrBackend, - "unstable_to_stable": UnstableToStableBackend, - "range_decomposer_validator": RangeDecomposerValidatorBackend, - "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, -} - - -def set_seed(random_seed): - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(random_seed) - torch.cuda.manual_seed_all(random_seed) - - -def get_hardward_name(args): - hardware_name = "unknown" - if "cuda" in args.device: - hardware_name = torch.cuda.get_device_name(args.device) - elif args.device == "cpu": - hardware_name = platform.processor() - return hardware_name - - -def get_compile_framework_version(args): - if args.compiler in ["inductor", "nope", "unstable_to_stable"]: - return torch.__version__ - elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: - # Assuming compiler object has a version attribute - return f"{args.compiler.capitalize()} {args.compiler.version}" - return "unknown" - - -def load_class_from_file( - args: argparse.Namespace, class_name: str, device: str -) -> Type[torch.nn.Module]: - file_path = f"{args.model_path}/model.py" - file = Path(file_path).resolve() - module_name = file.stem - - with open(file_path, "r", encoding="utf-8") as f: - model_code = f.read() - model_code = utils.modify_code_by_device(model_code, device) - spec = importlib.util.spec_from_loader(module_name, loader=None) - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - compiled_code = compile(model_code, filename=file, mode="exec") - exec(compiled_code, module.__dict__) - - model_class = getattr(module, class_name, None) - setattr(model_class, "__graph_net_file_path__", file_path) - setattr(model_class, "__graph_net_device__", device) - return model_class - - def convert_to_dict(config_str): if config_str in {None, "", "null", "None"}: return {} @@ -107,203 +22,6 @@ def convert_to_dict(config_str): return config -def get_compiler_backend(args) -> GraphCompilerBackend: - assert ( - args.compiler in compiler_backend_name2class - ), f"Unknown compiler: {args.compiler}" - backend_class = compiler_backend_name2class[args.compiler] - return backend_class(args.backend_config) - - -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device - - # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file(args, class_name="GraphModule", device=device) - model = model_class().to(torch.device(args.device)) - return model - - -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") - params = inputs_params["weight_info"] - for tensor_meta in params.values(): - if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device - return { - k: utils.replay_tensor(v).to(torch.device(args.device)) - for k, v in params.items() - } - - -def measure_performance(model_call, args, compiler): - stats = {} - outs = model_call() - - # Warmup runs - for _ in range(args.warmup): - model_call() - compiler.synchronize() - - hardware_name = get_hardward_name(args) - print( - f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", - file=sys.stderr, - flush=True, - ) - - if "cuda" in args.device: - """ - Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings, - With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench - """ - - e2e_times = [] - gpu_times = [] - - for i in range(args.trials): - # End-to-end timing (naive_timer) - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - # GPU-only timing (CUDA Events) - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - model_call() - - end_event.record() - compiler.synchronize() - - gpu_time_ms = start_event.elapsed_time(end_event) - e2e_times.append(duration_box.value) - gpu_times.append(gpu_time_ms) - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", - file=sys.stderr, - flush=True, - ) - - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) - - else: # CPU or other devices - e2e_times = [] - for i in range(args.trials): - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - model_call() - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", - file=sys.stderr, - flush=True, - ) - e2e_times.append(duration_box.value) - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - - return outs, stats - - -def test_single_model(args): - compiler = get_compiler_backend(args) - input_dict = get_input_dict(args) - model = get_model(args) - model_path = os.path.normpath(args.model_path) - test_compiler_util.print_with_log_prompt( - "[Processing]", model_path, args.log_prompt - ) - test_compiler_util.print_basic_config( - args, get_hardward_name(args), get_compile_framework_version(args) - ) - - runtime_seed = 1024 - eager_failure = False - expected_out = None - eager_time_stats = {} - - try: - - def eager_model_call(): - return model(**input_dict) - - expected_out, eager_time_stats = measure_performance( - eager_model_call, args, compiler - ) - - torch.manual_seed(runtime_seed) - if not isinstance(expected_out, tuple): - expected_out = (expected_out,) - except (TypeError, RuntimeError) as e: - print(f"Eager model execution failed: {str(e)}", file=sys.stderr) - eager_failure = True - - compiled_failure = False - compiled_model = None - compiled_time_stats = {} - - try: - compiled_model = compiler(model) - torch.manual_seed(runtime_seed) - - def compiled_model_call(): - return compiled_model(**input_dict) - - compiled_out, compiled_time_stats = measure_performance( - compiled_model_call, args, compiler - ) - - if not isinstance(compiled_out, tuple): - compiled_out = (compiled_out,) - if args.compiler == "xla": - compiled_out = tuple(item.to("cpu").to("cuda") for item in compiled_out) - except (TypeError, RuntimeError) as e: - print(f"Compiled model execution failed: {str(e)}", file=sys.stderr) - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - except Exception as e: - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - - if eager_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to eager model execution error.]", - file=sys.stderr, - flush=True, - ) - elif compiled_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to compiled model execution error.]", - file=sys.stderr, - flush=True, - ) - else: - compare_correctness(expected_out, compiled_out, args) - - print( - f"{args.log_prompt} [Result] status: success", file=sys.stderr, flush=True - ) - - test_compiler_util.print_times_and_speedup( - args, eager_time_stats, compiled_time_stats - ) - - -def print_and_store_cmp(key, cmp_func, args, expected_out, compiled_out, **kwargs): - cmp_ret = cmp_func(expected_out, compiled_out, **kwargs) - print( - f"{args.log_prompt} [Correctness]{key}: {cmp_ret}", - file=sys.stderr, - flush=True, - ) - return cmp_ret - - def compare_correctness(expected_out, compiled_out, args): eager_dtypes = [ ( @@ -386,13 +104,24 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol): return " ".join(results) -def get_sample_root(args): - return args.model_path_prefix +def parse_time_stats_from_reference_log(log_path): + assert os.path.isfile( + log_path + ), f"{log_path} does not exist or is not a regular file." + + with open(log_path, "r", encoding="utf-8") as f: + lines = f.readlines() + for line in reversed(lines): + if "[Performance][eager]" in line: + start = line.find("{") + end = line.rfind("}") + time_stats = json.loads(line[start : end + 1]) + return time_stats -def test_multi_models(args): +def eval_multi_models(args, model_path_prefix): test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, get_sample_root(args) + args.model_path_list, model_path_prefix ) sample_idx = 0 @@ -435,15 +164,15 @@ def test_multi_models(args): print(f"- {model_path}", file=sys.stderr, flush=True) -def test_multi_models_with_prefix(args): - assert os.path.isdir(args.model_path_prefix) +def eval_multi_models_with_prefix(args, model_path_prefix): + assert os.path.isdir(model_path_prefix) assert os.path.isfile(args.model_path_list) test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, get_sample_root(args) + args.model_path_list, model_path_prefix ) py_module_name = os.path.splitext(os.path.basename(__file__))[0] for rel_model_path in test_samples: - model_path = os.path.join(args.model_path_prefix, rel_model_path) + model_path = os.path.join(model_path_prefix, rel_model_path) if not os.path.exists(model_path): continue if not os.path.exists(os.path.join(model_path, "model.py")): @@ -467,39 +196,92 @@ def test_multi_models_with_prefix(args): traceback.print_exc() +def compare_perf_diff(args, model_path, ref_dir, target_dir): + # A + ref_dump_path = utils.get_output_path(ref_dir, model_path) + ref_out = torch.load(str(ref_dump_path)) + + ref_log_path = utils.get_log_path(ref_dir, model_path) + ref_time_stats = parse_time_stats_from_reference_log(ref_log_path) + + # B + target_dump_path = utils.get_output_path(target_dir, model_path) + target_out = torch.load(str(target_dump_path)) + + target_log_path = utils.get_log_path(target_dir, model_path) + target_time_stats = parse_time_stats_from_reference_log(target_log_path) + + compare_correctness(ref_out, target_out, args) + + test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) + + +def eval_single_model(args): + ref_dir = "/tmp/eval_perf_diff/A" + target_dir = "/tmp/eval_perf_diff/B" + + EvalCfg = types.SimpleNamespace( + ref_env=types.SimpleNamespace(**convert_to_dict(args.config)["ref_env"]), + target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), + ) + + ref_args = build_sub_args(EvalCfg.ref_env) + target_args = build_sub_args(EvalCfg.target_env) + + run_sub_process(ref_args, args.model_path, ref_dir) + run_sub_process(target_args, args.model_path, target_dir) + compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) + + +def run_sub_process(env_args, model_path, output_path): + cmd = [sys.executable, "-m", "graph_net_bench.torch.eval_backend_perf"] + args_pairs = [ + ("--model-path", model_path), + ("--output-path", output_path), + ("--seed", str(env_args.seed)), + ("--compiler", env_args.compiler), + ("--device", env_args.device), + ("--op-lib", env_args.op_lib), + ("--warmup", str(env_args.warmup)), + ("--trials", str(env_args.trials)), + ("--log-prompt", env_args.log_prompt), + ("--model-path-prefix", env_args.model_path_prefix), + ("--config", env_args.backend_config), + ] + + for arg_name, arg_value in args_pairs: + if arg_value is not None: + cmd.extend([arg_name, arg_value]) + + subprocess.run(cmd, check=True) + + +def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: + sub = argparse.Namespace() + sub.seed = getattr(env_ns, "seed", 123) + sub.compiler = getattr(env_ns, "compiler", None) + sub.device = getattr(env_ns, "device", None) + sub.op_lib = getattr(env_ns, "op_lib", None) + sub.warmup = getattr(env_ns, "warmup", 3) + sub.trials = getattr(env_ns, "trials", 5) + sub.log_prompt = getattr(env_ns, "log_prompt", None) + sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) + sub.backend_config = getattr(env_ns, "backend_config", None) + return sub + + def main(args): - if args.model_path_list is not None and args.model_path_prefix is not None: - test_multi_models_with_prefix(args) + config_dict = convert_to_dict(args.config) + model_path_prefix = config_dict["ref_env"]["model_path_prefix"] + if args.model_path_list is not None and model_path_prefix is not None: + eval_multi_models_with_prefix(args, model_path_prefix) return assert os.path.isdir(args.model_path) - initalize_seed = 123 - set_seed(random_seed=initalize_seed) - if path_utils.is_single_model_dir(args.model_path): - test_single_model(args) + eval_single_model(args) else: - test_multi_models(args) - - -def complete_default_args( - mut_args, - compiler: str = "inductor", # Compiler name - device: str = "cuda", # Device for testing the compiler (e.g., 'cpu' or 'cuda') - warmup: int = 3, # Number of warmup steps - trials: int = 5, # Number of timing trials - log_prompt: str = "graph-net-test-compiler-log", # Log prompt for performance log filtering - model_path_prefix: str = None, # Prefix path to model path in --model-path-list - backend_config: dict = None, # backend configuration json -): - backend_config = backend_config if backend_config is not None else {} - mut_args.compiler = compiler - mut_args.device = device - mut_args.warmup = warmup - mut_args.trials = trials - mut_args.log_prompt = log_prompt - mut_args.model_path_prefix = model_path_prefix - mut_args.backend_config = backend_config + eval_multi_models(args, model_path_prefix) if __name__ == "__main__": @@ -526,5 +308,4 @@ def complete_default_args( help="base64 encode configuration json.", ) args = parser.parse_args() - complete_default_args(args, **convert_to_dict(args.config)) main(args=args) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py new file mode 100644 index 000000000..7e12f6ebf --- /dev/null +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -0,0 +1,337 @@ +from . import utils +import argparse +import importlib.util +import torch +from pathlib import Path +from typing import Type +import sys +import os +import traceback +import json +import random +import numpy as np +import platform +import base64 +from contextlib import redirect_stdout, redirect_stderr + +from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend +from graph_net_bench.torch.backend.tvm_backend import TvmBackend +from graph_net_bench.torch.backend.xla_backend import XlaBackend +from graph_net_bench.torch.backend.inductor_backend import InductorBackend +from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend +from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend +from graph_net_bench.torch.backend.nope_backend import NopeBackend +from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend +from graph_net_bench.torch.backend.unstable_to_stable_backend import ( + UnstableToStableBackend, +) +from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( + RangeDecomposerValidatorBackend, +) +from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( + GraphVariableRenamerValidatorBackend, +) +from graph_net_bench import test_compiler_util + + +compiler_backend_name2class = { + "tvm": TvmBackend, + "xla": XlaBackend, + "inductor": InductorBackend, + "tensorrt": TensorRTBackend, + "bladedisc": BladeDISCBackend, + "nope": NopeBackend, + "pass_mgr": PassMgrBackend, + "unstable_to_stable": UnstableToStableBackend, + "range_decomposer_validator": RangeDecomposerValidatorBackend, + "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, +} + + +def register_op_lib(op_lib): + if op_lib == "flaggems": + import flag_gems + + flag_gems.enable() + else: + pass + + +def set_seed(random_seed): + random.seed(random_seed) + np.random.seed(random_seed) + torch.manual_seed(random_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(random_seed) + torch.cuda.manual_seed_all(random_seed) + + +def get_hardward_name(args): + hardware_name = "unknown" + if "cuda" in args.device: + hardware_name = torch.cuda.get_device_name(args.device) + elif args.device == "cpu": + hardware_name = platform.processor() + return hardware_name + + +def get_compile_framework_version(args): + if args.compiler in ["inductor", "nope", "unstable_to_stable"]: + return torch.__version__ + elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: + # Assuming compiler object has a version attribute + return f"{args.compiler.capitalize()} {args.compiler.version}" + return "unknown" + + +def load_class_from_file( + args: argparse.Namespace, class_name: str, device: str +) -> Type[torch.nn.Module]: + file_path = f"{args.model_path}/model.py" + file = Path(file_path).resolve() + module_name = file.stem + + with open(file_path, "r", encoding="utf-8") as f: + model_code = f.read() + model_code = utils.modify_code_by_device(model_code, device) + spec = importlib.util.spec_from_loader(module_name, loader=None) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + compiled_code = compile(model_code, filename=file, mode="exec") + exec(compiled_code, module.__dict__) + + model_class = getattr(module, class_name, None) + setattr(model_class, "__graph_net_file_path__", file_path) + setattr(model_class, "__graph_net_device__", device) + return model_class + + +def convert_to_dict(config_str): + if config_str is None or config_str == "None": + return {} + config_str = base64.b64decode(config_str).decode("utf-8") + config = json.loads(config_str) + assert isinstance(config, dict), f"config should be a dict. {config_str=}" + return config + + +def get_compiler_backend(args) -> GraphCompilerBackend: + assert ( + args.compiler in compiler_backend_name2class + ), f"Unknown compiler: {args.compiler}" + backend_class = compiler_backend_name2class[args.compiler] + config = convert_to_dict(args.config) if args.config is not None else {} + return backend_class(config) + + +def get_model(args): + device = "xla" if args.compiler == "xla" else args.device + + # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') + model_class = load_class_from_file(args, class_name="GraphModule", device=device) + model = model_class().to(torch.device(args.device)) + return model + + +def get_input_dict(args): + inputs_params = utils.load_converted_from_text(f"{args.model_path}") + params = inputs_params["weight_info"] + for tensor_meta in params.values(): + if "device" in tensor_meta["info"]: + tensor_meta["info"]["device"] = args.device + return { + k: utils.replay_tensor(v).to(torch.device(args.device)) + for k, v in params.items() + } + + +def measure_performance(model_call, args, compiler): + stats = {} + outs = model_call() + + # Warmup runs + for _ in range(args.warmup): + model_call() + compiler.synchronize() + + hardware_name = get_hardward_name(args) + print( + f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", + file=sys.stderr, + flush=True, + ) + + if "cuda" in args.device: + torch.cuda.empty_cache() + e2e_times = [] + gpu_times = [] + + for i in range(args.trials): + # End-to-end timing (naive_timer) + duration_box = test_compiler_util.DurationBox(-1) + with test_compiler_util.naive_timer(duration_box, compiler.synchronize): + # GPU-only timing (CUDA Events) + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + start_event.record() + + model_call() + + end_event.record() + compiler.synchronize() + + gpu_time_ms = start_event.elapsed_time(end_event) + e2e_times.append(duration_box.value) + gpu_times.append(gpu_time_ms) + print( + f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", + file=sys.stderr, + flush=True, + ) + + stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) + stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) + + else: # CPU or other devices + e2e_times = [] + for i in range(args.trials): + duration_box = test_compiler_util.DurationBox(-1) + with test_compiler_util.naive_timer(duration_box, compiler.synchronize): + model_call() + print( + f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", + file=sys.stderr, + flush=True, + ) + e2e_times.append(duration_box.value) + stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) + + return outs, stats + + +def eval_single_model(args): + log_path = utils.get_log_path(args.output_path, args.model_path) + output_dump_path = utils.get_output_path(args.output_path, args.model_path) + print(f"Log path: {log_path}", file=sys.stderr, flush=True) + print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) + + with open(log_path, "w", encoding="utf-8") as log_f: + with redirect_stdout(log_f), redirect_stderr(log_f): + compiler = get_compiler_backend(args) + + input_dict = get_input_dict(args) + model = get_model(args) + model.eval() + + test_compiler_util.print_with_log_prompt( + "[Config] seed:", args.seed, args.log_prompt + ) + + test_compiler_util.print_basic_config( + args, + get_hardward_name(args), + get_compile_framework_version(args), + ) + + test_compiler_util.print_with_log_prompt( + "[Config] op_lib:", args.op_lib, args.log_prompt + ) + + success = False + time_stats = {} + try: + compiled_model = compiler(model) + + def model_call(): + return compiled_model(**input_dict) + + outputs, time_stats = measure_performance(model_call, args, compiler) + success = True + except Exception as e: + print( + f"Run model failed: {str(e)}\n{traceback.format_exc()}", + file=sys.stderr, + flush=True, + ) + + test_compiler_util.print_running_status(args, success) + if success: + torch.save(outputs, str(output_dump_path)) + test_compiler_util.print_with_log_prompt( + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + ) + + with open(log_path, "r", encoding="utf-8") as f: + content = f.read() + print(content, file=sys.stderr, flush=True) + + +def main(args): + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) + eval_single_model(args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="GraphNet Backend Performance Evaluation" + ) + parser.add_argument( + "--model-path", + type=str, + required=False, + default=None, + help="Path to model file(s), each subdirectory containing graph_net.json will be regarded as a model", + ) + parser.add_argument( + "--output-path", + type=str, + required=False, + default="/tmp/test_save", + help="Path to save outputs", + ) + parser.add_argument("--seed", type=int, required=False, default=123) + parser.add_argument( + "--compiler", + type=str, + required=False, + default="inductor", + help="Path to customized compiler python file", + ) + parser.add_argument( + "--device", + type=str, + required=False, + default="cuda", + help="Device for testing the compiler (e.g., 'cpu' or 'cuda')", + ) + parser.add_argument("--op-lib", type=str, required=False, default=None) + parser.add_argument( + "--warmup", type=int, required=False, default=3, help="Number of warmup steps" + ) + parser.add_argument( + "--trials", type=int, required=False, default=5, help="Number of timing trials" + ) + parser.add_argument( + "--log-prompt", + type=str, + required=False, + default="graph-net-test-compiler-log", + help="Log prompt for performance log filtering.", + ) + parser.add_argument( + "--model-path-prefix", + type=str, + required=False, + default=None, + help="Prefix path to model path list", + ) + parser.add_argument( + "--config", + type=str, + required=False, + default=None, + help="base64 encode configuration json.", + ) + args = parser.parse_args() + main(args=args) diff --git a/graph_net_bench/torch/utils.py b/graph_net_bench/torch/utils.py index c937ff4de..700a59972 100755 --- a/graph_net_bench/torch/utils.py +++ b/graph_net_bench/torch/utils.py @@ -1,4 +1,5 @@ import torch +import os import ast import math import inspect @@ -7,6 +8,16 @@ kLiteralTensorSize = 64 +def get_log_path(log_dir, model_path): + model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") + return os.path.join(log_dir, f"{model_name}.log") + + +def get_output_path(output_dir, model_path): + model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") + return os.path.join(output_dir, f"{model_name}.pth") + + def get_limited_precision_float_str(value): if not isinstance(value, float): return value diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index e3fa79602..16da81903 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -10,9 +10,16 @@ python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ --config $(base64 -w 0 <&1 | tee "$OUTPUT_PATH/validation.log" From f83ab0cfd1bfdb325c356a0604e9cb717e45abde Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 08:56:00 +0800 Subject: [PATCH 02/20] Simplify eval_multi_models --- graph_net_bench/torch/eval_backend_diff.py | 123 ++++++++++----------- 1 file changed, 57 insertions(+), 66 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 07a19ff88..50d17cb62 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -119,81 +119,69 @@ def parse_time_stats_from_reference_log(log_path): return time_stats -def eval_multi_models(args, model_path_prefix): - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - +def eval_multi_models(args, model_path_prefix=None, use_model_list=False): sample_idx = 0 failed_samples = [] module_name = os.path.splitext(os.path.basename(__file__))[0] - for model_path in path_utils.get_recursively_model_path(args.model_path): - if test_samples is None or os.path.abspath(model_path) in test_samples: - print( - f"[{sample_idx}] {module_name}, model_path: {model_path}", - file=sys.stderr, - flush=True, - ) - cmd = " ".join( - [ - sys.executable, - f"-m graph_net_bench.torch.{module_name}", - f"--model-path {model_path}", - f"--config {args.config}", - ] - ) - try: - process = subprocess.Popen(cmd, shell=True) - cmd_ret = process.wait() - except KeyboardInterrupt: - print("KeyboardInterrupt") - sys.exit(1) - except Exception: - print("\n--- Full Traceback ---") - traceback.print_exc() - if cmd_ret != 0: - failed_samples.append(model_path) - sample_idx += 1 - - print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", - file=sys.stderr, - flush=True, - ) - for model_path in failed_samples: - print(f"- {model_path}", file=sys.stderr, flush=True) - -def eval_multi_models_with_prefix(args, model_path_prefix): - assert os.path.isdir(model_path_prefix) - assert os.path.isfile(args.model_path_list) - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - py_module_name = os.path.splitext(os.path.basename(__file__))[0] - for rel_model_path in test_samples: - model_path = os.path.join(model_path_prefix, rel_model_path) - if not os.path.exists(model_path): - continue - if not os.path.exists(os.path.join(model_path, "model.py")): - continue + if use_model_list: + assert os.path.isdir(model_path_prefix) + assert os.path.isfile(args.model_path_list) + test_samples = test_compiler_util.get_allow_samples( + args.model_path_list, model_path_prefix + ) + model_paths = [] + for rel_model_path in test_samples: + model_path = os.path.join(model_path_prefix, rel_model_path) + if os.path.exists(model_path) and os.path.exists( + os.path.join(model_path, "model.py") + ): + model_paths.append(model_path) + else: + assert os.path.isdir(args.model_path) + test_samples = test_compiler_util.get_allow_samples( + args.model_path_list, model_path_prefix + ) + model_paths = [] + for model_path in path_utils.get_recursively_model_path(args.model_path): + if test_samples is None or os.path.abspath(model_path) in test_samples: + model_paths.append(model_path) + + for model_path in model_paths: + print( + f"[{sample_idx}] {module_name}, model_path: {model_path}", + file=sys.stderr, + flush=True, + ) cmd = " ".join( [ sys.executable, - f"-m graph_net_bench.torch.{py_module_name}", + f"-m graph_net_bench.torch.{module_name}", f"--model-path {model_path}", f"--config {args.config}", ] ) try: process = subprocess.Popen(cmd, shell=True) - process.wait() + cmd_ret = process.wait() except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() + if cmd_ret != 0: + failed_samples.append(model_path) + sample_idx += 1 + + print( + f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", + file=sys.stderr, + flush=True, + ) + if failed_samples: + for model_path in failed_samples: + print(f"- {model_path}", file=sys.stderr, flush=True) def compare_perf_diff(args, model_path, ref_dir, target_dir): @@ -272,20 +260,23 @@ def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: def main(args): config_dict = convert_to_dict(args.config) - model_path_prefix = config_dict["ref_env"]["model_path_prefix"] - if args.model_path_list is not None and model_path_prefix is not None: - eval_multi_models_with_prefix(args, model_path_prefix) - return - assert os.path.isdir(args.model_path) - - if path_utils.is_single_model_dir(args.model_path): - eval_single_model(args) + model_path_prefix = config_dict.get("ref_env", {}).get("model_path_prefix") + + if args.model_path_list and model_path_prefix: + eval_multi_models(args, model_path_prefix, use_model_list=True) + elif os.path.isdir(args.model_path): + if path_utils.is_single_model_dir(args.model_path): + eval_single_model(args) + else: + eval_multi_models(args, model_path_prefix, use_model_list=False) else: - eval_multi_models(args, model_path_prefix) + raise ValueError(f"Invalid model path: {args.model_path}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Test compiler performance.") + parser = argparse.ArgumentParser( + description="Evaluate backend performance difference." + ) parser.add_argument( "--model-path", type=str, From 9670c7a6787b7d8b03cee5e5232383587ed839ea Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 08:59:30 +0800 Subject: [PATCH 03/20] minor change --- graph_net_bench/torch/eval_backend_diff.py | 4 ++-- graph_net_bench/torch/eval_backend_perf.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 50d17cb62..a5c02ec7b 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -252,7 +252,7 @@ def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: sub.op_lib = getattr(env_ns, "op_lib", None) sub.warmup = getattr(env_ns, "warmup", 3) sub.trials = getattr(env_ns, "trials", 5) - sub.log_prompt = getattr(env_ns, "log_prompt", None) + sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) sub.backend_config = getattr(env_ns, "backend_config", None) return sub @@ -275,7 +275,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Evaluate backend performance difference." + description="Evaluate Backend Performance Difference." ) parser.add_argument( "--model-path", diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 7e12f6ebf..60194ae88 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -274,7 +274,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - description="GraphNet Backend Performance Evaluation" + description="Single Backend Performance Evaluation" ) parser.add_argument( "--model-path", @@ -316,7 +316,7 @@ def main(args): "--log-prompt", type=str, required=False, - default="graph-net-test-compiler-log", + default="graph-net-bench-log", help="Log prompt for performance log filtering.", ) parser.add_argument( From cb9a4f1e3cb7aff5a68b9f34083ba913faeb276f Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 09:06:35 +0800 Subject: [PATCH 04/20] Minor change on names --- graph_net_bench/torch/eval_backend_perf.py | 28 ++++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 60194ae88..c550767f0 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -66,28 +66,28 @@ def set_seed(random_seed): torch.cuda.manual_seed_all(random_seed) -def get_hardward_name(args): +def get_hardward_name(device): hardware_name = "unknown" - if "cuda" in args.device: - hardware_name = torch.cuda.get_device_name(args.device) + if "cuda" in device: + hardware_name = torch.cuda.get_device_name(device) elif args.device == "cpu": hardware_name = platform.processor() return hardware_name -def get_compile_framework_version(args): - if args.compiler in ["inductor", "nope", "unstable_to_stable"]: +def get_compiler_version(compiler): + if compiler in ["inductor", "nope", "unstable_to_stable"]: return torch.__version__ - elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: + elif compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: # Assuming compiler object has a version attribute - return f"{args.compiler.capitalize()} {args.compiler.version}" + return f"{compiler.capitalize()} {compiler.version}" return "unknown" def load_class_from_file( - args: argparse.Namespace, class_name: str, device: str + model_path: str, class_name: str, device: str ) -> Type[torch.nn.Module]: - file_path = f"{args.model_path}/model.py" + file_path = f"{model_path}/model.py" file = Path(file_path).resolve() module_name = file.stem @@ -128,7 +128,9 @@ def get_model(args): device = "xla" if args.compiler == "xla" else args.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file(args, class_name="GraphModule", device=device) + model_class = load_class_from_file( + args.model_path, class_name="GraphModule", device=device + ) model = model_class().to(torch.device(args.device)) return model @@ -154,7 +156,7 @@ def measure_performance(model_call, args, compiler): model_call() compiler.synchronize() - hardware_name = get_hardward_name(args) + hardware_name = get_hardward_name(args.device) print( f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", file=sys.stderr, @@ -229,8 +231,8 @@ def eval_single_model(args): test_compiler_util.print_basic_config( args, - get_hardward_name(args), - get_compile_framework_version(args), + get_hardward_name(args.device), + get_compiler_version(args.compiler), ) test_compiler_util.print_with_log_prompt( From 6b0975da11997a52ee4f8daf11215f6dd3b71564 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:41:35 +0800 Subject: [PATCH 05/20] use call method instead of bash --- graph_net_bench/torch/eval_backend_diff.py | 75 ++++++++++------------ graph_net_bench/torch/eval_backend_perf.py | 4 +- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index a5c02ec7b..bb7811689 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,5 +1,4 @@ from . import utils -import subprocess import argparse import torch import sys @@ -11,6 +10,7 @@ import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils +from .eval_backend_perf import eval_single_model_with_single_backend def convert_to_dict(config_str): @@ -153,23 +153,33 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): file=sys.stderr, flush=True, ) - cmd = " ".join( - [ - sys.executable, - f"-m graph_net_bench.torch.{module_name}", - f"--model-path {model_path}", - f"--config {args.config}", - ] - ) + try: - process = subprocess.Popen(cmd, shell=True) - cmd_ret = process.wait() + single_model_args = argparse.Namespace() + + single_model_args.model_path = model_path + single_model_args.config = args.config + single_model_args.model_path_list = None + + if path_utils.is_single_model_dir(model_path): + eval_single_model(single_model_args) + else: + submodel_paths = path_utils.get_recursively_model_path(model_path) + for submodel_path in submodel_paths: + sub_args = argparse.Namespace() + sub_args.model_path = submodel_path + sub_args.config = args.config + sub_args.model_path_list = None + eval_single_model(sub_args) + cmd_ret = 0 except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() + cmd_ret = 1 + if cmd_ret != 0: failed_samples.append(model_path) sample_idx += 1 @@ -213,48 +223,29 @@ def eval_single_model(args): target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), ) - ref_args = build_sub_args(EvalCfg.ref_env) - target_args = build_sub_args(EvalCfg.target_env) + ref_args = build_sub_args(EvalCfg.ref_env, args.model_path, ref_dir) + target_args = build_sub_args(EvalCfg.target_env, args.model_path, target_dir) - run_sub_process(ref_args, args.model_path, ref_dir) - run_sub_process(target_args, args.model_path, target_dir) + eval_single_model_with_single_backend(ref_args) + eval_single_model_with_single_backend(target_args) compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) -def run_sub_process(env_args, model_path, output_path): - cmd = [sys.executable, "-m", "graph_net_bench.torch.eval_backend_perf"] - args_pairs = [ - ("--model-path", model_path), - ("--output-path", output_path), - ("--seed", str(env_args.seed)), - ("--compiler", env_args.compiler), - ("--device", env_args.device), - ("--op-lib", env_args.op_lib), - ("--warmup", str(env_args.warmup)), - ("--trials", str(env_args.trials)), - ("--log-prompt", env_args.log_prompt), - ("--model-path-prefix", env_args.model_path_prefix), - ("--config", env_args.backend_config), - ] - - for arg_name, arg_value in args_pairs: - if arg_value is not None: - cmd.extend([arg_name, arg_value]) - - subprocess.run(cmd, check=True) - - -def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: +def build_sub_args( + env_ns: types.SimpleNamespace, model_path: str, output_path: str +) -> argparse.Namespace: sub = argparse.Namespace() + sub.model_path = model_path + sub.output_path = output_path sub.seed = getattr(env_ns, "seed", 123) - sub.compiler = getattr(env_ns, "compiler", None) - sub.device = getattr(env_ns, "device", None) + sub.compiler = getattr(env_ns, "compiler", "inductor") + sub.device = getattr(env_ns, "device", "cuda") sub.op_lib = getattr(env_ns, "op_lib", None) sub.warmup = getattr(env_ns, "warmup", 3) sub.trials = getattr(env_ns, "trials", 5) sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.backend_config = getattr(env_ns, "backend_config", None) + sub.config = getattr(env_ns, "backend_config", None) return sub diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index c550767f0..d099ac7d9 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -211,7 +211,7 @@ def measure_performance(model_call, args, compiler): return outs, stats -def eval_single_model(args): +def eval_single_model_with_single_backend(args): log_path = utils.get_log_path(args.output_path, args.model_path) output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) @@ -271,7 +271,7 @@ def model_call(): def main(args): set_seed(args.seed) os.makedirs(args.output_path, exist_ok=True) - eval_single_model(args) + eval_single_model_with_single_backend(args) if __name__ == "__main__": From 980f7377973080b6bfd89250e7129cf3647a638a Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:46:21 +0800 Subject: [PATCH 06/20] minor change --- graph_net_bench/torch/eval_backend_perf.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index d099ac7d9..fcf313cef 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -212,6 +212,8 @@ def measure_performance(model_call, args, compiler): def eval_single_model_with_single_backend(args): + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) log_path = utils.get_log_path(args.output_path, args.model_path) output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) @@ -268,12 +270,6 @@ def model_call(): print(content, file=sys.stderr, flush=True) -def main(args): - set_seed(args.seed) - os.makedirs(args.output_path, exist_ok=True) - eval_single_model_with_single_backend(args) - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Single Backend Performance Evaluation" @@ -336,4 +332,4 @@ def main(args): help="base64 encode configuration json.", ) args = parser.parse_args() - main(args=args) + eval_single_model_with_single_backend(args=args) From 5c49521d22fb87d4cf3f25549dd5ee2b76652262 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:55:08 +0800 Subject: [PATCH 07/20] change some names --- graph_net_bench/torch/eval_backend_diff.py | 16 ++++++++++------ graph_net_bench/torch/eval_backend_perf.py | 8 +++++--- test/eval_backend_diff_test.sh | 8 ++++++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index bb7811689..0e7229086 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -219,12 +219,16 @@ def eval_single_model(args): target_dir = "/tmp/eval_perf_diff/B" EvalCfg = types.SimpleNamespace( - ref_env=types.SimpleNamespace(**convert_to_dict(args.config)["ref_env"]), - target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), + reference_config=types.SimpleNamespace( + **convert_to_dict(args.config)["reference_config"] + ), + target_config=types.SimpleNamespace( + **convert_to_dict(args.config)["target_config"] + ), ) - ref_args = build_sub_args(EvalCfg.ref_env, args.model_path, ref_dir) - target_args = build_sub_args(EvalCfg.target_env, args.model_path, target_dir) + ref_args = build_sub_args(EvalCfg.reference_config, args.model_path, ref_dir) + target_args = build_sub_args(EvalCfg.target_config, args.model_path, target_dir) eval_single_model_with_single_backend(ref_args) eval_single_model_with_single_backend(target_args) @@ -245,13 +249,13 @@ def build_sub_args( sub.trials = getattr(env_ns, "trials", 5) sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.config = getattr(env_ns, "backend_config", None) + sub.backend_config = getattr(env_ns, "backend_config", None) return sub def main(args): config_dict = convert_to_dict(args.config) - model_path_prefix = config_dict.get("ref_env", {}).get("model_path_prefix") + model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") if args.model_path_list and model_path_prefix: eval_multi_models(args, model_path_prefix, use_model_list=True) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index fcf313cef..29c40d3fb 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -120,8 +120,10 @@ def get_compiler_backend(args) -> GraphCompilerBackend: args.compiler in compiler_backend_name2class ), f"Unknown compiler: {args.compiler}" backend_class = compiler_backend_name2class[args.compiler] - config = convert_to_dict(args.config) if args.config is not None else {} - return backend_class(config) + backend_config = ( + convert_to_dict(args.backend_config) if args.backend_config is not None else {} + ) + return backend_class(backend_config) def get_model(args): @@ -325,7 +327,7 @@ def model_call(): help="Prefix path to model path list", ) parser.add_argument( - "--config", + "--backend-config", type=str, required=False, default=None, diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index 16da81903..17bba712e 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -10,14 +10,18 @@ python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ --config $(base64 -w 0 < Date: Fri, 16 Jan 2026 15:51:53 +0800 Subject: [PATCH 08/20] Dynamically load backend class based on args.compiler --- graph_net_bench/torch/eval_backend_perf.py | 61 +++++++++------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 29c40d3fb..4d5ea94a5 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -13,41 +13,10 @@ import platform import base64 from contextlib import redirect_stdout, redirect_stderr - from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend -from graph_net_bench.torch.backend.tvm_backend import TvmBackend -from graph_net_bench.torch.backend.xla_backend import XlaBackend -from graph_net_bench.torch.backend.inductor_backend import InductorBackend -from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend -from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend -from graph_net_bench.torch.backend.nope_backend import NopeBackend -from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend -from graph_net_bench.torch.backend.unstable_to_stable_backend import ( - UnstableToStableBackend, -) -from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( - RangeDecomposerValidatorBackend, -) -from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( - GraphVariableRenamerValidatorBackend, -) from graph_net_bench import test_compiler_util -compiler_backend_name2class = { - "tvm": TvmBackend, - "xla": XlaBackend, - "inductor": InductorBackend, - "tensorrt": TensorRTBackend, - "bladedisc": BladeDISCBackend, - "nope": NopeBackend, - "pass_mgr": PassMgrBackend, - "unstable_to_stable": UnstableToStableBackend, - "range_decomposer_validator": RangeDecomposerValidatorBackend, - "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, -} - - def register_op_lib(op_lib): if op_lib == "flaggems": import flag_gems @@ -70,7 +39,7 @@ def get_hardward_name(device): hardware_name = "unknown" if "cuda" in device: hardware_name = torch.cuda.get_device_name(device) - elif args.device == "cpu": + elif device == "cpu": hardware_name = platform.processor() return hardware_name @@ -116,10 +85,28 @@ def convert_to_dict(config_str): def get_compiler_backend(args) -> GraphCompilerBackend: - assert ( - args.compiler in compiler_backend_name2class - ), f"Unknown compiler: {args.compiler}" - backend_class = compiler_backend_name2class[args.compiler] + """ + Dynamically load backend class based on args.compiler + """ + compiler_name = args.compiler.lower() + module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" + + try: + module = __import__(module_name, fromlist=[f"{compiler_name.title()}Backend"]) + + class_name = ( + f"{''.join(part.title() for part in compiler_name.split('_'))}Backend" + ) + + backend_class = None + if hasattr(module, class_name): + backend_class = getattr(module, class_name) + else: + raise ImportError(f"No valid backend class found in {module_name}") + + except ImportError as e: + raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") + backend_config = ( convert_to_dict(args.backend_config) if args.backend_config is not None else {} ) @@ -327,7 +314,7 @@ def model_call(): help="Prefix path to model path list", ) parser.add_argument( - "--backend-config", + "--config", type=str, required=False, default=None, From 8c2b1c3799020940836c34ace41350557b1e7ac8 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 16:28:01 +0800 Subject: [PATCH 09/20] Change argument passing to json config --- graph_net_bench/test_compiler_util.py | 28 +++++ graph_net_bench/torch/eval_backend_diff.py | 50 +++----- graph_net_bench/torch/eval_backend_perf.py | 131 +++++++-------------- 3 files changed, 88 insertions(+), 121 deletions(-) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index f587da2ff..de38a29fa 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -5,6 +5,7 @@ import time import subprocess import shutil +import base64 import numpy as np from dataclasses import dataclass from contextlib import contextmanager @@ -156,6 +157,24 @@ def print_basic_config(args, hardware_name, compile_framework_version): ) +def print_config(model_path, config, hardware_name, compiler_version): + model_path = os.path.normpath(model_path) + model_name = get_model_name(model_path) + print_with_log_prompt("[Config] model:", model_name, config.log_prompt) + print_with_log_prompt("[Config] seed:", config.seed, config.log_prompt) + print_with_log_prompt("[Config] device:", config.device, config.log_prompt) + print_with_log_prompt("[Config] hardware:", hardware_name, config.log_prompt) + print_with_log_prompt("[Config] op_lib:", config.op_lib, config.log_prompt) + print_with_log_prompt("[Config] compiler:", config.compiler, config.log_prompt) + print_with_log_prompt("[Config] warmup:", config.warmup, config.log_prompt) + print_with_log_prompt("[Config] trials:", config.trials, config.log_prompt) + print_with_log_prompt( + "[Config] compile_framework_version:", + compiler_version, + config.log_prompt, + ) + + def print_running_status(args, eager_success, compiled_success=None): def convert_to_str(b): return "success" if b else "failed" @@ -353,3 +372,12 @@ def get_allow_samples(allow_list, model_path_prefix): test_samples.append(os.path.join(model_path_prefix, line.strip())) return test_samples + + +def convert_to_dict(config_str): + if config_str in {None, "", "null", "None"}: + return {} + config_str = base64.b64decode(config_str).decode("utf-8") + config = json.loads(config_str) + assert isinstance(config, dict), f"config should be a dict. {config_str=}" + return config diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 0e7229086..6f8dc550b 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -6,22 +6,12 @@ import os.path import traceback import json -import base64 import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils from .eval_backend_perf import eval_single_model_with_single_backend -def convert_to_dict(config_str): - if config_str in {None, "", "null", "None"}: - return {} - config_str = base64.b64decode(config_str).decode("utf-8") - config = json.loads(config_str) - assert isinstance(config, dict), f"config should be a dict. {config_str=}" - return config - - def compare_correctness(expected_out, compiled_out, args): eager_dtypes = [ ( @@ -220,41 +210,37 @@ def eval_single_model(args): EvalCfg = types.SimpleNamespace( reference_config=types.SimpleNamespace( - **convert_to_dict(args.config)["reference_config"] + **test_compiler_util.convert_to_dict(args.config)["reference_config"] ), target_config=types.SimpleNamespace( - **convert_to_dict(args.config)["target_config"] + **test_compiler_util.convert_to_dict(args.config)["target_config"] ), ) - ref_args = build_sub_args(EvalCfg.reference_config, args.model_path, ref_dir) - target_args = build_sub_args(EvalCfg.target_config, args.model_path, target_dir) + reference_config = build_sub_config(EvalCfg.reference_config) + target_config = build_sub_config(EvalCfg.target_config) - eval_single_model_with_single_backend(ref_args) - eval_single_model_with_single_backend(target_args) - compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) + eval_single_model_with_single_backend(args.model_path, ref_dir, reference_config) + eval_single_model_with_single_backend(args.model_path, target_dir, target_config) + compare_perf_diff(reference_config, args.model_path, ref_dir, target_dir) -def build_sub_args( - env_ns: types.SimpleNamespace, model_path: str, output_path: str -) -> argparse.Namespace: +def build_sub_config(config): sub = argparse.Namespace() - sub.model_path = model_path - sub.output_path = output_path - sub.seed = getattr(env_ns, "seed", 123) - sub.compiler = getattr(env_ns, "compiler", "inductor") - sub.device = getattr(env_ns, "device", "cuda") - sub.op_lib = getattr(env_ns, "op_lib", None) - sub.warmup = getattr(env_ns, "warmup", 3) - sub.trials = getattr(env_ns, "trials", 5) - sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") - sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.backend_config = getattr(env_ns, "backend_config", None) + sub.seed = getattr(config, "seed", 123) + sub.compiler = getattr(config, "compiler", "inductor") + sub.device = getattr(config, "device", "cuda") + sub.op_lib = getattr(config, "op_lib", None) + sub.warmup = getattr(config, "warmup", 3) + sub.trials = getattr(config, "trials", 5) + sub.log_prompt = getattr(config, "log_prompt", "graph-net-bench-log") + sub.model_path_prefix = getattr(config, "model_path_prefix", None) + sub.backend_config = getattr(config, "backend_config", None) return sub def main(args): - config_dict = convert_to_dict(args.config) + config_dict = test_compiler_util.convert_to_dict(args.config) model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") if args.model_path_list and model_path_prefix: diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 4d5ea94a5..3774d4176 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -11,7 +11,6 @@ import random import numpy as np import platform -import base64 from contextlib import redirect_stdout, redirect_stderr from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend from graph_net_bench import test_compiler_util @@ -75,20 +74,11 @@ def load_class_from_file( return model_class -def convert_to_dict(config_str): - if config_str is None or config_str == "None": - return {} - config_str = base64.b64decode(config_str).decode("utf-8") - config = json.loads(config_str) - assert isinstance(config, dict), f"config should be a dict. {config_str=}" - return config - - -def get_compiler_backend(args) -> GraphCompilerBackend: +def get_compiler_backend(config) -> GraphCompilerBackend: """ - Dynamically load backend class based on args.compiler + Dynamically load backend class based on config.compiler """ - compiler_name = args.compiler.lower() + compiler_name = config.compiler.lower() module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" try: @@ -108,56 +98,58 @@ def get_compiler_backend(args) -> GraphCompilerBackend: raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") backend_config = ( - convert_to_dict(args.backend_config) if args.backend_config is not None else {} + test_compiler_util.convert_to_dict(config.backend_config) + if config.backend_config is not None + else {} ) return backend_class(backend_config) -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device +def get_model(model_path, config): + device = "xla" if config.compiler == "xla" else config.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') model_class = load_class_from_file( - args.model_path, class_name="GraphModule", device=device + model_path, class_name="GraphModule", device=device ) - model = model_class().to(torch.device(args.device)) + model = model_class().to(torch.device(config.device)) return model -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") +def get_input_dict(model_path, config): + inputs_params = utils.load_converted_from_text(f"{model_path}") params = inputs_params["weight_info"] for tensor_meta in params.values(): if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device + tensor_meta["info"]["device"] = config.device return { - k: utils.replay_tensor(v).to(torch.device(args.device)) + k: utils.replay_tensor(v).to(torch.device(config.device)) for k, v in params.items() } -def measure_performance(model_call, args, compiler): +def measure_performance(model_call, config, compiler): stats = {} outs = model_call() # Warmup runs - for _ in range(args.warmup): + for _ in range(config.warmup): model_call() compiler.synchronize() - hardware_name = get_hardward_name(args.device) + hardware_name = get_hardward_name(config.device) print( - f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", + f"[Profiling] Using device: {config.device} {hardware_name}, warm up {config.warmup}, trials {config.trials}", file=sys.stderr, flush=True, ) - if "cuda" in args.device: + if "cuda" in config.device: torch.cuda.empty_cache() e2e_times = [] gpu_times = [] - for i in range(args.trials): + for i in range(config.trials): # End-to-end timing (naive_timer) duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): @@ -185,7 +177,7 @@ def measure_performance(model_call, args, compiler): else: # CPU or other devices e2e_times = [] - for i in range(args.trials): + for i in range(config.trials): duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): model_call() @@ -200,34 +192,27 @@ def measure_performance(model_call, args, compiler): return outs, stats -def eval_single_model_with_single_backend(args): - set_seed(args.seed) - os.makedirs(args.output_path, exist_ok=True) - log_path = utils.get_log_path(args.output_path, args.model_path) - output_dump_path = utils.get_output_path(args.output_path, args.model_path) +def eval_single_model_with_single_backend(model_path, output_path, config): + set_seed(config.seed) + os.makedirs(output_path, exist_ok=True) + log_path = utils.get_log_path(output_path, model_path) + output_dump_path = utils.get_output_path(output_path, model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) with open(log_path, "w", encoding="utf-8") as log_f: with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = get_compiler_backend(args) + compiler = get_compiler_backend(config) - input_dict = get_input_dict(args) - model = get_model(args) + input_dict = get_input_dict(model_path, config) + model = get_model(model_path, config) model.eval() - test_compiler_util.print_with_log_prompt( - "[Config] seed:", args.seed, args.log_prompt - ) - - test_compiler_util.print_basic_config( - args, - get_hardward_name(args.device), - get_compiler_version(args.compiler), - ) - - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt + test_compiler_util.print_config( + model_path, + config, + get_hardward_name(config.device), + get_compiler_version(config.compiler), ) success = False @@ -238,7 +223,7 @@ def eval_single_model_with_single_backend(args): def model_call(): return compiled_model(**input_dict) - outputs, time_stats = measure_performance(model_call, args, compiler) + outputs, time_stats = measure_performance(model_call, config, compiler) success = True except Exception as e: print( @@ -247,11 +232,11 @@ def model_call(): flush=True, ) - test_compiler_util.print_running_status(args, success) + test_compiler_util.print_running_status(config, success) if success: torch.save(outputs, str(output_dump_path)) test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + "[Performance][eager]:", json.dumps(time_stats), config.log_prompt ) with open(log_path, "r", encoding="utf-8") as f: @@ -277,42 +262,6 @@ def model_call(): default="/tmp/test_save", help="Path to save outputs", ) - parser.add_argument("--seed", type=int, required=False, default=123) - parser.add_argument( - "--compiler", - type=str, - required=False, - default="inductor", - help="Path to customized compiler python file", - ) - parser.add_argument( - "--device", - type=str, - required=False, - default="cuda", - help="Device for testing the compiler (e.g., 'cpu' or 'cuda')", - ) - parser.add_argument("--op-lib", type=str, required=False, default=None) - parser.add_argument( - "--warmup", type=int, required=False, default=3, help="Number of warmup steps" - ) - parser.add_argument( - "--trials", type=int, required=False, default=5, help="Number of timing trials" - ) - parser.add_argument( - "--log-prompt", - type=str, - required=False, - default="graph-net-bench-log", - help="Log prompt for performance log filtering.", - ) - parser.add_argument( - "--model-path-prefix", - type=str, - required=False, - default=None, - help="Prefix path to model path list", - ) parser.add_argument( "--config", type=str, @@ -321,4 +270,8 @@ def model_call(): help="base64 encode configuration json.", ) args = parser.parse_args() - eval_single_model_with_single_backend(args=args) + eval_single_model_with_single_backend( + args.model_path, + args.output_path, + **test_compiler_util.convert_to_dict(args.config), + ) From db877bdb37c4cf20ca29414340d6a5707f072be9 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 18:11:20 +0800 Subject: [PATCH 10/20] Add check_and_complete_args --- graph_net_bench/test_compiler_util.py | 22 ++--- graph_net_bench/torch/eval_backend_diff.py | 73 ++++++--------- graph_net_bench/torch/eval_backend_perf.py | 100 +++++++++++++-------- 3 files changed, 101 insertions(+), 94 deletions(-) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index de38a29fa..44ccc703e 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -157,21 +157,21 @@ def print_basic_config(args, hardware_name, compile_framework_version): ) -def print_config(model_path, config, hardware_name, compiler_version): - model_path = os.path.normpath(model_path) +def print_config(args, hardware_name, compiler_version): + model_path = os.path.normpath(args.model_path) model_name = get_model_name(model_path) - print_with_log_prompt("[Config] model:", model_name, config.log_prompt) - print_with_log_prompt("[Config] seed:", config.seed, config.log_prompt) - print_with_log_prompt("[Config] device:", config.device, config.log_prompt) - print_with_log_prompt("[Config] hardware:", hardware_name, config.log_prompt) - print_with_log_prompt("[Config] op_lib:", config.op_lib, config.log_prompt) - print_with_log_prompt("[Config] compiler:", config.compiler, config.log_prompt) - print_with_log_prompt("[Config] warmup:", config.warmup, config.log_prompt) - print_with_log_prompt("[Config] trials:", config.trials, config.log_prompt) + print_with_log_prompt("[Config] model:", model_name, args.log_prompt) + print_with_log_prompt("[Config] seed:", args.seed, args.log_prompt) + print_with_log_prompt("[Config] device:", args.device, args.log_prompt) + print_with_log_prompt("[Config] hardware:", hardware_name, args.log_prompt) + print_with_log_prompt("[Config] op_lib:", args.op_lib, args.log_prompt) + print_with_log_prompt("[Config] compiler:", args.compiler, args.log_prompt) + print_with_log_prompt("[Config] warmup:", args.warmup, args.log_prompt) + print_with_log_prompt("[Config] trials:", args.trials, args.log_prompt) print_with_log_prompt( "[Config] compile_framework_version:", compiler_version, - config.log_prompt, + args.log_prompt, ) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 6f8dc550b..c230f6bd8 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -146,10 +146,9 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): try: single_model_args = argparse.Namespace() - single_model_args.model_path = model_path - single_model_args.config = args.config single_model_args.model_path_list = None + single_model_args.config = args.config if path_utils.is_single_model_dir(model_path): eval_single_model(single_model_args) @@ -158,8 +157,8 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): for submodel_path in submodel_paths: sub_args = argparse.Namespace() sub_args.model_path = submodel_path - sub_args.config = args.config sub_args.model_path_list = None + sub_args.config = args.config eval_single_model(sub_args) cmd_ret = 0 except KeyboardInterrupt: @@ -184,60 +183,44 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): print(f"- {model_path}", file=sys.stderr, flush=True) -def compare_perf_diff(args, model_path, ref_dir, target_dir): +def eval_single_model(args): + ref_dir = "/tmp/eval_perf_diff/A" + target_dir = "/tmp/eval_perf_diff/B" + + ref_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=ref_dir, + **test_compiler_util.convert_to_dict(args.config)["reference_config"], + ) + target_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=target_dir, + **test_compiler_util.convert_to_dict(args.config)["target_config"], + ) + + eval_single_model_with_single_backend(ref_args) + eval_single_model_with_single_backend(target_args) + + # compare_perf_diff # A - ref_dump_path = utils.get_output_path(ref_dir, model_path) + ref_dump_path = utils.get_output_path(ref_dir, args.model_path) ref_out = torch.load(str(ref_dump_path)) - ref_log_path = utils.get_log_path(ref_dir, model_path) + ref_log_path = utils.get_log_path(ref_dir, args.model_path) ref_time_stats = parse_time_stats_from_reference_log(ref_log_path) # B - target_dump_path = utils.get_output_path(target_dir, model_path) + target_dump_path = utils.get_output_path(target_dir, args.model_path) target_out = torch.load(str(target_dump_path)) - target_log_path = utils.get_log_path(target_dir, model_path) + target_log_path = utils.get_log_path(target_dir, args.model_path) target_time_stats = parse_time_stats_from_reference_log(target_log_path) - compare_correctness(ref_out, target_out, args) - - test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) - - -def eval_single_model(args): - ref_dir = "/tmp/eval_perf_diff/A" - target_dir = "/tmp/eval_perf_diff/B" - - EvalCfg = types.SimpleNamespace( - reference_config=types.SimpleNamespace( - **test_compiler_util.convert_to_dict(args.config)["reference_config"] - ), - target_config=types.SimpleNamespace( - **test_compiler_util.convert_to_dict(args.config)["target_config"] - ), + compare_correctness(ref_out, target_out, ref_args) + test_compiler_util.print_times_and_speedup( + ref_args, ref_time_stats, target_time_stats ) - reference_config = build_sub_config(EvalCfg.reference_config) - target_config = build_sub_config(EvalCfg.target_config) - - eval_single_model_with_single_backend(args.model_path, ref_dir, reference_config) - eval_single_model_with_single_backend(args.model_path, target_dir, target_config) - compare_perf_diff(reference_config, args.model_path, ref_dir, target_dir) - - -def build_sub_config(config): - sub = argparse.Namespace() - sub.seed = getattr(config, "seed", 123) - sub.compiler = getattr(config, "compiler", "inductor") - sub.device = getattr(config, "device", "cuda") - sub.op_lib = getattr(config, "op_lib", None) - sub.warmup = getattr(config, "warmup", 3) - sub.trials = getattr(config, "trials", 5) - sub.log_prompt = getattr(config, "log_prompt", "graph-net-bench-log") - sub.model_path_prefix = getattr(config, "model_path_prefix", None) - sub.backend_config = getattr(config, "backend_config", None) - return sub - def main(args): config_dict = test_compiler_util.convert_to_dict(args.config) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 3774d4176..5c8586f30 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -11,6 +11,7 @@ import random import numpy as np import platform +import types from contextlib import redirect_stdout, redirect_stderr from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend from graph_net_bench import test_compiler_util @@ -74,11 +75,11 @@ def load_class_from_file( return model_class -def get_compiler_backend(config) -> GraphCompilerBackend: +def get_compiler_backend(args) -> GraphCompilerBackend: """ - Dynamically load backend class based on config.compiler + Dynamically load backend class based on args.compiler """ - compiler_name = config.compiler.lower() + compiler_name = args.compiler.lower() module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" try: @@ -98,58 +99,57 @@ def get_compiler_backend(config) -> GraphCompilerBackend: raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") backend_config = ( - test_compiler_util.convert_to_dict(config.backend_config) - if config.backend_config is not None + test_compiler_util.convert_to_dict(args.backend_config) + if args.backend_config is not None else {} ) return backend_class(backend_config) -def get_model(model_path, config): - device = "xla" if config.compiler == "xla" else config.device +def get_model(args): + device = "xla" if args.compiler == "xla" else args.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') model_class = load_class_from_file( - model_path, class_name="GraphModule", device=device + args.model_path, class_name="GraphModule", device=device ) - model = model_class().to(torch.device(config.device)) + model = model_class().to(torch.device(args.device)) return model -def get_input_dict(model_path, config): - inputs_params = utils.load_converted_from_text(f"{model_path}") +def get_input_dict(args): + inputs_params = utils.load_converted_from_text(f"{args.model_path}") params = inputs_params["weight_info"] for tensor_meta in params.values(): if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = config.device + tensor_meta["info"]["device"] = args.device return { - k: utils.replay_tensor(v).to(torch.device(config.device)) + k: utils.replay_tensor(v).to(torch.device(args.device)) for k, v in params.items() } -def measure_performance(model_call, config, compiler): +def measure_performance(model_call, args, compiler): stats = {} outs = model_call() # Warmup runs - for _ in range(config.warmup): + for _ in range(args.warmup): model_call() compiler.synchronize() - hardware_name = get_hardward_name(config.device) print( - f"[Profiling] Using device: {config.device} {hardware_name}, warm up {config.warmup}, trials {config.trials}", + f"[Profiling] Warm up {args.warmup}, Trials {args.trials}", file=sys.stderr, flush=True, ) - if "cuda" in config.device: + if "cuda" in args.device: torch.cuda.empty_cache() e2e_times = [] gpu_times = [] - for i in range(config.trials): + for i in range(args.trials): # End-to-end timing (naive_timer) duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): @@ -177,7 +177,7 @@ def measure_performance(model_call, config, compiler): else: # CPU or other devices e2e_times = [] - for i in range(config.trials): + for i in range(args.trials): duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): model_call() @@ -192,27 +192,27 @@ def measure_performance(model_call, config, compiler): return outs, stats -def eval_single_model_with_single_backend(model_path, output_path, config): - set_seed(config.seed) - os.makedirs(output_path, exist_ok=True) - log_path = utils.get_log_path(output_path, model_path) - output_dump_path = utils.get_output_path(output_path, model_path) +def eval_single_model_with_single_backend(args): + check_and_complete_args(args) + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) + log_path = utils.get_log_path(args.output_path, args.model_path) + output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) with open(log_path, "w", encoding="utf-8") as log_f: with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = get_compiler_backend(config) + compiler = get_compiler_backend(args) - input_dict = get_input_dict(model_path, config) - model = get_model(model_path, config) + input_dict = get_input_dict(args) + model = get_model(args) model.eval() test_compiler_util.print_config( - model_path, - config, - get_hardward_name(config.device), - get_compiler_version(config.compiler), + args, + get_hardward_name(args.device), + get_compiler_version(args.compiler), ) success = False @@ -223,7 +223,7 @@ def eval_single_model_with_single_backend(model_path, output_path, config): def model_call(): return compiled_model(**input_dict) - outputs, time_stats = measure_performance(model_call, config, compiler) + outputs, time_stats = measure_performance(model_call, args, compiler) success = True except Exception as e: print( @@ -232,11 +232,11 @@ def model_call(): flush=True, ) - test_compiler_util.print_running_status(config, success) + test_compiler_util.print_running_status(args, success) if success: torch.save(outputs, str(output_dump_path)) test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), config.log_prompt + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt ) with open(log_path, "r", encoding="utf-8") as f: @@ -244,6 +244,29 @@ def model_call(): print(content, file=sys.stderr, flush=True) +def check_and_complete_args(args): + """ + Ensure all required arguments are present with default values if missing + """ + defaults = { + "model_path": None, # Model path + "output_path": None, # Log and output directory + "seed": 123, # Random seed + "compiler": "inductor", # Compiler name + "device": "cuda", # Device for testing the compiler (e.g., 'cpu' or 'cuda') + "op_lib": None, # Operator library + "warmup": 3, # Number of warmup steps + "trials": 5, # Number of timing trials + "log_prompt": "graph-net-bench-log", # Log prompt for performance log filtering + "model_path_prefix": None, # Prefix path to model path in args.model-path + "backend_config": None, # backend configuration json + } + + for key, default in defaults.items(): + if not hasattr(args, key): + setattr(args, key, default) + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Single Backend Performance Evaluation" @@ -270,8 +293,9 @@ def model_call(): help="base64 encode configuration json.", ) args = parser.parse_args() - eval_single_model_with_single_backend( - args.model_path, - args.output_path, + mut_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=args.output_path, **test_compiler_util.convert_to_dict(args.config), ) + eval_single_model_with_single_backend(mut_args) From 0e6ec45faf2fe026640c7535e0ed4d2e567dfe02 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 18:25:24 +0800 Subject: [PATCH 11/20] Simplify --- graph_net_bench/torch/eval_backend_diff.py | 78 +++++++++++----------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index c230f6bd8..ecafb71ae 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -109,72 +109,74 @@ def parse_time_stats_from_reference_log(log_path): return time_stats -def eval_multi_models(args, model_path_prefix=None, use_model_list=False): - sample_idx = 0 - failed_samples = [] - module_name = os.path.splitext(os.path.basename(__file__))[0] - +def _get_model_paths(args, model_path_prefix, use_model_list): if use_model_list: - assert os.path.isdir(model_path_prefix) - assert os.path.isfile(args.model_path_list) + assert os.path.isdir(model_path_prefix) and os.path.isfile(args.model_path_list) + test_samples = test_compiler_util.get_allow_samples( args.model_path_list, model_path_prefix ) - model_paths = [] - for rel_model_path in test_samples: - model_path = os.path.join(model_path_prefix, rel_model_path) - if os.path.exists(model_path) and os.path.exists( - os.path.join(model_path, "model.py") - ): - model_paths.append(model_path) + model_paths = [ + os.path.join(model_path_prefix, rel_model_path) + for rel_model_path in test_samples + if os.path.exists( + os.path.join(model_path_prefix, rel_model_path, "model.py") + ) + ] else: assert os.path.isdir(args.model_path) + test_samples = test_compiler_util.get_allow_samples( args.model_path_list, model_path_prefix ) - model_paths = [] - for model_path in path_utils.get_recursively_model_path(args.model_path): - if test_samples is None or os.path.abspath(model_path) in test_samples: - model_paths.append(model_path) + model_paths = [ + model_path + for model_path in path_utils.get_recursively_model_path(args.model_path) + if test_samples is None or os.path.abspath(model_path) in test_samples + ] + + return model_paths + - for model_path in model_paths: +def _create_model_args(model_path, config): + args = argparse.Namespace() + args.model_path = model_path + args.model_path_list = None + args.config = config + return args + + +def eval_multi_models(args, model_path_prefix=None, use_model_list=False): + module_name = os.path.splitext(os.path.basename(__file__))[0] + + model_paths = _get_model_paths(args, model_path_prefix, use_model_list) + failed_samples = [] + for sample_idx, model_path in enumerate(model_paths): print( f"[{sample_idx}] {module_name}, model_path: {model_path}", file=sys.stderr, flush=True, ) - try: - single_model_args = argparse.Namespace() - single_model_args.model_path = model_path - single_model_args.model_path_list = None - single_model_args.config = args.config - if path_utils.is_single_model_dir(model_path): - eval_single_model(single_model_args) + eval_single_model(_create_model_args(model_path, args.config)) else: - submodel_paths = path_utils.get_recursively_model_path(model_path) - for submodel_path in submodel_paths: - sub_args = argparse.Namespace() - sub_args.model_path = submodel_path - sub_args.model_path_list = None - sub_args.config = args.config - eval_single_model(sub_args) - cmd_ret = 0 + for submodel_path in path_utils.get_recursively_model_path(model_path): + eval_single_model(_create_model_args(submodel_path, args.config)) + success = True except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() - cmd_ret = 1 + success = False - if cmd_ret != 0: + if not success: failed_samples.append(model_path) - sample_idx += 1 print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", + f"Totally {len(model_paths)} verified samples, failed {len(failed_samples)} samples.", file=sys.stderr, flush=True, ) From a5fa17369258592c16abdd0ef69a47a92c5f677c Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 13:57:16 +0800 Subject: [PATCH 12/20] modify args.config to separate args.reference_config and args.target_config --- graph_net_bench/torch/eval_backend_diff.py | 39 +++++++++++++--------- test/eval_backend_diff_test.sh | 31 ++++++++--------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index ecafb71ae..c254eafaf 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -138,11 +138,12 @@ def _get_model_paths(args, model_path_prefix, use_model_list): return model_paths -def _create_model_args(model_path, config): +def _create_model_args(model_path, reference_config, target_config): args = argparse.Namespace() args.model_path = model_path args.model_path_list = None - args.config = config + args.reference_config = reference_config + args.target_config = target_config return args @@ -157,12 +158,15 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): file=sys.stderr, flush=True, ) + + model_args = argparse.Namespace() + model_args.model_path = model_path + model_args.model_path_list = None + model_args.reference_config = args.reference_config + model_args.target_config = args.target_config + try: - if path_utils.is_single_model_dir(model_path): - eval_single_model(_create_model_args(model_path, args.config)) - else: - for submodel_path in path_utils.get_recursively_model_path(model_path): - eval_single_model(_create_model_args(submodel_path, args.config)) + eval_single_model(model_args) success = True except KeyboardInterrupt: print("KeyboardInterrupt") @@ -192,12 +196,12 @@ def eval_single_model(args): ref_args = types.SimpleNamespace( model_path=args.model_path, output_path=ref_dir, - **test_compiler_util.convert_to_dict(args.config)["reference_config"], + **test_compiler_util.convert_to_dict(args.reference_config), ) target_args = types.SimpleNamespace( model_path=args.model_path, output_path=target_dir, - **test_compiler_util.convert_to_dict(args.config)["target_config"], + **test_compiler_util.convert_to_dict(args.target_config), ) eval_single_model_with_single_backend(ref_args) @@ -225,8 +229,8 @@ def eval_single_model(args): def main(args): - config_dict = test_compiler_util.convert_to_dict(args.config) - model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") + ref_config = test_compiler_util.convert_to_dict(args.reference_config) + model_path_prefix = ref_config.get("model_path_prefix") if args.model_path_list and model_path_prefix: eval_multi_models(args, model_path_prefix, use_model_list=True) @@ -258,11 +262,16 @@ def main(args): help="Path to samples list, each line contains a sample path", ) parser.add_argument( - "--config", + "--reference-config", type=str, - required=False, - default=None, - help="base64 encode configuration json.", + required=True, + help="base64 encode reference config json.", + ) + parser.add_argument( + "--target-config", + type=str, + required=True, + help="base64 encode target config json.", ) args = parser.parse_args() main(args=args) diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index 17bba712e..1eaca5ecd 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -8,22 +8,23 @@ model_list="$AI4C_ROOT/test/workspace_eval_backend_diff/sample_list.txt" python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ - --config $(base64 -w 0 <&1 | tee "$OUTPUT_PATH/validation.log" From 0c9e07b8d93e9f1aba28569a86995edab583b383 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 14:24:51 +0800 Subject: [PATCH 13/20] reuse some code --- graph_net/torch/test_reference_device.py | 26 ++++-------------------- graph_net/torch/test_target_device.py | 22 +++++++------------- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index f022d2ba5..33d0ec8e4 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -11,30 +11,12 @@ from graph_net_bench import test_compiler_util from graph_net import model_path_util from graph_net_bench.torch import test_compiler - - -def get_reference_log_path(reference_dir, model_path): - model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") - return os.path.join(reference_dir, f"{model_name}.log") - - -def get_reference_output_path(reference_dir, model_path): - model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") - return os.path.join(reference_dir, f"{model_name}.pth") - - -def register_op_lib(op_lib): - if op_lib == "flaggems": - import flag_gems - - flag_gems.enable() - else: - pass +from graph_net_bench.torch import utils, eval_backend_perf def test_single_model(args): - ref_log = get_reference_log_path(args.reference_dir, args.model_path) - ref_dump = get_reference_output_path(args.reference_dir, args.model_path) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) + ref_dump = utils.get_output_path(args.reference_dir, args.model_path) print(f"Reference log path: {ref_log}", file=sys.stderr, flush=True) print(f"Reference outputs path: {ref_dump}", file=sys.stderr, flush=True) @@ -149,7 +131,7 @@ def main(args): ref_dump_dir.mkdir(parents=True, exist_ok=True) if path_utils.is_single_model_dir(args.model_path): - register_op_lib(args.op_lib) + eval_backend_perf.register_op_lib(args.op_lib) test_single_model(args) else: test_multi_models(args) diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index ec2085a32..cf56dee69 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -8,7 +8,7 @@ from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler, test_reference_device +from graph_net_bench.torch import test_compiler, utils, eval_backend_perf def parse_config_from_reference_log(log_path): @@ -46,9 +46,7 @@ def parse_time_stats_from_reference_log(log_path): def update_args_and_set_seed(args, model_path): - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, model_path - ) + ref_log = utils.get_log_path(args.reference_dir, model_path) config = parse_config_from_reference_log(ref_log) vars(args)["model_path"] = model_path vars(args)["compiler"] = config.get("compiler") @@ -100,14 +98,10 @@ def model_call(): if test_compiler_util.get_subgraph_tag(args.model_path): model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path) - ref_dump = test_reference_device.get_reference_output_path( - args.reference_dir, args.model_path - ) + ref_dump = utils.get_output_path(args.reference_dir, args.model_path) ref_out = torch.load(str(ref_dump)) - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, args.model_path - ) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) ref_time_stats = parse_time_stats_from_reference_log(ref_log) if success: @@ -117,7 +111,7 @@ def model_call(): def is_reference_log_exist(reference_dir, model_path): - log_path = test_reference_device.get_reference_log_path(reference_dir, model_path) + log_path = utils.get_log_path(reference_dir, model_path) return os.path.isfile(log_path) @@ -171,16 +165,14 @@ def main(args): if path_utils.is_single_model_dir(args.model_path): if args.op_lib == "origin": - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, args.model_path - ) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) config = parse_config_from_reference_log(ref_log) vars(args)["op_lib"] = config.get("op_lib") test_compiler_util.print_with_log_prompt( "[Config] op_lib:", args.op_lib, args.log_prompt ) else: - test_reference_device.register_op_lib(args.op_lib) + eval_backend_perf.register_op_lib(args.op_lib) args = update_args_and_set_seed(args, args.model_path) test_single_model(args) From ebd46af74be6fb0ee7828cb4eca27754afcf1a11 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 14:37:02 +0800 Subject: [PATCH 14/20] Add unittest on test device; minor fix --- graph_net/torch/test_reference_device.py | 2 +- test/eval_device_diff_test.sh | 37 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100755 test/eval_device_diff_test.sh diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index 33d0ec8e4..6a28095e4 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -119,7 +119,7 @@ def test_multi_models(args): def main(args): assert os.path.isdir(args.model_path) # Support all torch compilers - valid_compilers = list(test_compiler.registry_backend.keys()) + valid_compilers = list(test_compiler.compiler_backend_name2class.keys()) assert ( args.compiler in valid_compilers ), f"Compiler must be one of {valid_compilers}" diff --git a/test/eval_device_diff_test.sh b/test/eval_device_diff_test.sh new file mode 100755 index 000000000..10e0ab766 --- /dev/null +++ b/test/eval_device_diff_test.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +AI4C_ROOT=$(python3 -c "import graph_net_bench; import os; print(os.path.dirname(os.path.dirname(graph_net_bench.__file__)))") +OUTPUT_PATH=/tmp/workspace_eval_device_diff_test +REFERENCE_DIR="$OUTPUT_PATH/reference" + +mkdir -p "$OUTPUT_PATH" +mkdir -p "$REFERENCE_DIR" + +MODEL_PATH="$AI4C_ROOT/samples/ultralytics/yolov3-tinyu" + +echo "==========================================" +echo "Step 1: Generate reference on device A (simulated)" +echo "==========================================" +python3 -m graph_net.torch.test_reference_device \ + --model-path "$MODEL_PATH" \ + --compiler nope \ + --device cuda \ + --warmup 1 \ + --trials 1 \ + --reference-dir "$REFERENCE_DIR" \ + 2>&1 | tee "$OUTPUT_PATH/reference.log" + +echo "" +echo "==========================================" +echo "Step 2: Compare on device B (simulated)" +echo "==========================================" +python3 -m graph_net.torch.test_target_device \ + --model-path "$MODEL_PATH" \ + --device cuda \ + --reference-dir "$REFERENCE_DIR" \ + 2>&1 | tee "$OUTPUT_PATH/target.log" + +echo "" +echo "==========================================" +echo "Test completed. Logs saved to: $OUTPUT_PATH" +echo "==========================================" \ No newline at end of file From 74b5238ef64ed5a1c08eceed44bbe0e77cc3f72c Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 15:15:45 +0800 Subject: [PATCH 15/20] reuse eval_backend_perf, eval_backend_diff instead of test_compiler in test_device --- graph_net/torch/test_reference_device.py | 96 +++++-------------- graph_net/torch/test_target_device.py | 114 +++++++++-------------- 2 files changed, 66 insertions(+), 144 deletions(-) diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index 6a28095e4..bb80c1e8c 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -1,76 +1,33 @@ import argparse -import torch import os -from pathlib import Path -from contextlib import redirect_stdout, redirect_stderr -import json import sys -import traceback +import types +from pathlib import Path from graph_net_bench import path_utils -from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler -from graph_net_bench.torch import utils, eval_backend_perf +from graph_net_bench.torch import eval_backend_perf + + +def convert_args_for_eval_backend(args): + """Convert test_reference_device args to eval_backend_perf args format.""" + return types.SimpleNamespace( + model_path=args.model_path, + output_path=args.reference_dir, + seed=args.seed, + compiler=args.compiler, + device=args.device, + op_lib=args.op_lib, + warmup=args.warmup, + trials=args.trials, + log_prompt=args.log_prompt, + backend_config=getattr(args, "config", None), + ) def test_single_model(args): - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - ref_dump = utils.get_output_path(args.reference_dir, args.model_path) - print(f"Reference log path: {ref_log}", file=sys.stderr, flush=True) - print(f"Reference outputs path: {ref_dump}", file=sys.stderr, flush=True) - - with open(ref_log, "w", encoding="utf-8") as log_f: - with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = test_compiler.get_compiler_backend(args) - - input_dict = test_compiler.get_input_dict(args) - model = test_compiler.get_model(args) - model.eval() - - test_compiler_util.print_with_log_prompt( - "[Config] seed:", args.seed, args.log_prompt - ) - - test_compiler_util.print_basic_config( - args, - test_compiler.get_hardward_name(args), - test_compiler.get_compile_framework_version(args), - ) - - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt - ) - - success = False - time_stats = {} - try: - compiled_model = compiler(model) - - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = test_compiler.measure_performance( - model_call, args, compiler - ) - success = True - except Exception as e: - print( - f"Run model failed: {str(e)}\n{traceback.format_exc()}", - file=sys.stderr, - flush=True, - ) - - test_compiler_util.print_running_status(args, success) - if success: - torch.save(outputs, str(ref_dump)) - test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), args.log_prompt - ) - - with open(ref_log, "r", encoding="utf-8") as f: - content = f.read() - print(content, file=sys.stderr, flush=True) + eval_args = convert_args_for_eval_backend(args) + eval_backend_perf.eval_single_model_with_single_backend(eval_args) def test_multi_models(args): @@ -118,14 +75,9 @@ def test_multi_models(args): def main(args): assert os.path.isdir(args.model_path) - # Support all torch compilers - valid_compilers = list(test_compiler.compiler_backend_name2class.keys()) - assert ( - args.compiler in valid_compilers - ), f"Compiler must be one of {valid_compilers}" - assert args.device in ["cuda"] - - test_compiler.set_seed(random_seed=args.seed) + assert args.device in ["cuda", "cpu"] + + eval_backend_perf.set_seed(args.seed) ref_dump_dir = Path(args.reference_dir) ref_dump_dir.mkdir(parents=True, exist_ok=True) diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index cf56dee69..ee46ceee6 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -1,14 +1,13 @@ import argparse import os -import json import sys -import traceback +import types import torch from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler, utils, eval_backend_perf +from graph_net_bench.torch import utils, eval_backend_perf, eval_backend_diff def parse_config_from_reference_log(log_path): @@ -30,84 +29,55 @@ def parse_config_from_reference_log(log_path): return config -def parse_time_stats_from_reference_log(log_path): - assert os.path.isfile( - log_path - ), f"{log_path} does not exist or is not a regular file." - - with open(log_path, "r", encoding="utf-8") as f: - lines = f.readlines() - for line in reversed(lines): - if "[Performance][eager]" in line: - start = line.find("{") - end = line.rfind("}") - time_stats = json.loads(line[start : end + 1]) - return time_stats - - -def update_args_and_set_seed(args, model_path): +def get_ref_config_from_log(args, model_path): + """Extract config from reference log file.""" ref_log = utils.get_log_path(args.reference_dir, model_path) config = parse_config_from_reference_log(ref_log) - vars(args)["model_path"] = model_path - vars(args)["compiler"] = config.get("compiler") - vars(args)["trials"] = int(config.get("trials")) - vars(args)["warmup"] = int(config.get("warmup")) - test_compiler.set_seed(random_seed=int(config.get("seed"))) - return args - - -def test_single_model(args): - compiler = test_compiler.get_compiler_backend(args) + return config - input_dict = test_compiler.get_input_dict(args) - model = test_compiler.get_model(args) - model.eval() - model_path = os.path.normpath(args.model_path) - test_compiler_util.print_with_log_prompt( - "[Processing]", model_path, args.log_prompt - ) - test_compiler_util.print_basic_config( - args, - test_compiler.get_hardward_name(args), - test_compiler.get_compile_framework_version(args), +def convert_args_for_eval_backend(args, output_path): + """Convert test_target_device args to eval_backend_perf args format.""" + return types.SimpleNamespace( + model_path=args.model_path, + output_path=output_path, + seed=args.seed, + compiler=args.compiler, + device=args.device, + op_lib=args.op_lib, + warmup=args.warmup, + trials=args.trials, + log_prompt=args.log_prompt, + backend_config=getattr(args, "config", None), ) - success = False - time_stats = {} - try: - compiled_model = compiler(model) - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = test_compiler.measure_performance( - model_call, args, compiler - ) - success = True - except Exception as e: - print( - f"Run model failed: {str(e)}\n{traceback.format_exc()}", - file=sys.stderr, - flush=True, - ) +def test_single_model(args): + target_dir = "/tmp/eval_device_diff/target" - test_compiler_util.print_running_status(args, success) + ref_config = get_ref_config_from_log(args, args.model_path) + vars(args)["compiler"] = ref_config.get("compiler") + vars(args)["trials"] = int(ref_config.get("trials")) + vars(args)["warmup"] = int(ref_config.get("warmup")) + vars(args)["seed"] = int(ref_config.get("seed")) - model_name = test_compiler_util.get_model_name(args.model_path) - if test_compiler_util.get_subgraph_tag(args.model_path): - model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path) + eval_args = convert_args_for_eval_backend(args, target_dir) + eval_backend_perf.eval_single_model_with_single_backend(eval_args) ref_dump = utils.get_output_path(args.reference_dir, args.model_path) ref_out = torch.load(str(ref_dump)) - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - ref_time_stats = parse_time_stats_from_reference_log(ref_log) + ref_time_stats = eval_backend_diff.parse_time_stats_from_reference_log(ref_log) - if success: - test_compiler.compare_correctness(ref_out, outputs, args) + target_dump = utils.get_output_path(target_dir, args.model_path) + target_out = torch.load(str(target_dump)) + target_log = utils.get_log_path(target_dir, args.model_path) + target_time_stats = eval_backend_diff.parse_time_stats_from_reference_log( + target_log + ) - test_compiler_util.print_times_and_speedup(args, ref_time_stats, time_stats) + eval_backend_diff.compare_correctness(ref_out, target_out, eval_args) + test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) def is_reference_log_exist(reference_dir, model_path): @@ -165,16 +135,16 @@ def main(args): if path_utils.is_single_model_dir(args.model_path): if args.op_lib == "origin": - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - config = parse_config_from_reference_log(ref_log) - vars(args)["op_lib"] = config.get("op_lib") - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt + ref_config = get_ref_config_from_log(args, args.model_path) + vars(args)["op_lib"] = ref_config.get("op_lib") + print( + f"{args.log_prompt} [Config] op_lib: {args.op_lib}", + file=sys.stderr, + flush=True, ) else: eval_backend_perf.register_op_lib(args.op_lib) - args = update_args_and_set_seed(args, args.model_path) test_single_model(args) else: test_multi_models(args) From d8514e4a13f67aac1bcd293cbba62cd860008b21 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 16:48:12 +0800 Subject: [PATCH 16/20] move utest --- .../test/test_device_test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/eval_device_diff_test.sh => graph_net/test/test_device_test.sh (100%) diff --git a/test/eval_device_diff_test.sh b/graph_net/test/test_device_test.sh similarity index 100% rename from test/eval_device_diff_test.sh rename to graph_net/test/test_device_test.sh From b83b6a967770a644881a6751800ef7e7dc144a28 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 16:53:13 +0800 Subject: [PATCH 17/20] minor change --- graph_net_bench/torch/eval_backend_diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index c254eafaf..cfa171dc6 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -190,8 +190,8 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): def eval_single_model(args): - ref_dir = "/tmp/eval_perf_diff/A" - target_dir = "/tmp/eval_perf_diff/B" + ref_dir = "/tmp/eval_perf_diff/reference" + target_dir = "/tmp/eval_perf_diff/target" ref_args = types.SimpleNamespace( model_path=args.model_path, From 3a7d9baa928c547d258a327ef8cd7237f1da683f Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 21:45:02 +0800 Subject: [PATCH 18/20] =?UTF-8?q?Add=20local=5Frunner=EF=BC=8Cprocess=5Fru?= =?UTF-8?q?nner=20and=20remote=5Frunner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- graph_net/torch/test_target_device.py | 6 +- graph_net_bench/test_compiler_util.py | 62 ++++++++ graph_net_bench/torch/eval_backend_diff.py | 85 +++++----- graph_net_bench/torch/runner/__init__.py | 14 ++ graph_net_bench/torch/runner/base_runner.py | 149 ++++++++++++++++++ graph_net_bench/torch/runner/local_runner.py | 99 ++++++++++++ .../torch/runner/process_runner.py | 102 ++++++++++++ graph_net_bench/torch/runner/remote_runner.py | 135 ++++++++++++++++ test/eval_device_diff_test.sh | 38 +++++ 9 files changed, 645 insertions(+), 45 deletions(-) create mode 100644 graph_net_bench/torch/runner/__init__.py create mode 100644 graph_net_bench/torch/runner/base_runner.py create mode 100644 graph_net_bench/torch/runner/local_runner.py create mode 100644 graph_net_bench/torch/runner/process_runner.py create mode 100644 graph_net_bench/torch/runner/remote_runner.py create mode 100755 test/eval_device_diff_test.sh diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index ee46ceee6..88cc9a650 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -67,14 +67,12 @@ def test_single_model(args): ref_dump = utils.get_output_path(args.reference_dir, args.model_path) ref_out = torch.load(str(ref_dump)) ref_log = utils.get_log_path(args.reference_dir, args.model_path) - ref_time_stats = eval_backend_diff.parse_time_stats_from_reference_log(ref_log) + ref_time_stats = test_compiler_util.parse_performance_stats(str(ref_log)) target_dump = utils.get_output_path(target_dir, args.model_path) target_out = torch.load(str(target_dump)) target_log = utils.get_log_path(target_dir, args.model_path) - target_time_stats = eval_backend_diff.parse_time_stats_from_reference_log( - target_log - ) + target_time_stats = test_compiler_util.parse_performance_stats(str(target_log)) eval_backend_diff.compare_correctness(ref_out, target_out, eval_args) test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index 44ccc703e..a83f55994 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -7,6 +7,7 @@ import shutil import base64 import numpy as np +from typing import Dict, Any from dataclasses import dataclass from contextlib import contextmanager @@ -381,3 +382,64 @@ def convert_to_dict(config_str): config = json.loads(config_str) assert isinstance(config, dict), f"config should be a dict. {config_str=}" return config + + +def convert_to_base64(config_dict): + """Convert a dict to base64 encoded JSON string.""" + if config_dict is None: + return "" + config_str = json.dumps(config_dict) + return base64.b64encode(config_str.encode("utf-8")).decode("utf-8") + + +def parse_performance_stats(log_path: str) -> Dict[str, Any]: + """Parse performance statistics from log file. + + Args: + log_path: Path to the log file + + Returns: + Dictionary containing time statistics + + Raises: + FileNotFoundError: If log_path does not exist + ValueError: If performance data cannot be parsed + """ + if not os.path.isfile(log_path): + raise FileNotFoundError(f"Log file not found: {log_path}") + + with open(log_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + # Search backwards for performance data + for line in reversed(lines): + if "[Performance][eager]" in line: + start = line.find("{") + end = line.rfind("}") + if start != -1 and end != -1: + try: + time_stats = json.loads(line[start : end + 1]) + return time_stats + except json.JSONDecodeError as e: + raise ValueError(f"Failed to parse performance stats: {e}") + + raise ValueError("No performance statistics found in log file") + + +def extract_log_content(log_path: str) -> str: + """Extract and return the entire content of a log file. + + Args: + log_path: Path to the log file + + Returns: + String containing the log content + + Raises: + FileNotFoundError: If log_path does not exist + """ + if not os.path.isfile(log_path): + raise FileNotFoundError(f"Log file not found: {log_path}") + + with open(log_path, "r", encoding="utf-8") as f: + return f.read() diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index cfa171dc6..68e1f2f02 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,15 +1,13 @@ -from . import utils import argparse import torch import sys import os import os.path import traceback -import json import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils -from .eval_backend_perf import eval_single_model_with_single_backend +from .runner import RunnerConfig, RunResult, create_runner def compare_correctness(expected_out, compiled_out, args): @@ -94,21 +92,6 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol): return " ".join(results) -def parse_time_stats_from_reference_log(log_path): - assert os.path.isfile( - log_path - ), f"{log_path} does not exist or is not a regular file." - - with open(log_path, "r", encoding="utf-8") as f: - lines = f.readlines() - for line in reversed(lines): - if "[Performance][eager]" in line: - start = line.find("{") - end = line.rfind("}") - time_stats = json.loads(line[start : end + 1]) - return time_stats - - def _get_model_paths(args, model_path_prefix, use_model_list): if use_model_list: assert os.path.isdir(model_path_prefix) and os.path.isfile(args.model_path_list) @@ -190,41 +173,61 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): def eval_single_model(args): + """ + Unified evaluation using Runner abstraction. + Supports local, process, and remote execution via runner_type in config. + """ ref_dir = "/tmp/eval_perf_diff/reference" target_dir = "/tmp/eval_perf_diff/target" - ref_args = types.SimpleNamespace( - model_path=args.model_path, - output_path=ref_dir, - **test_compiler_util.convert_to_dict(args.reference_config), + ref_config_dict = test_compiler_util.convert_to_dict(args.reference_config) + target_config_dict = test_compiler_util.convert_to_dict(args.target_config) + + ref_runner_config = RunnerConfig.from_dict(ref_config_dict) + target_runner_config = RunnerConfig.from_dict(target_config_dict) + + ref_runner = create_runner(ref_runner_config) + target_runner = create_runner(target_runner_config) + + print( + f"[eval_backend_diff] Reference runner: {ref_runner_config.strategy.runner_type.value}", + file=sys.stderr, + flush=True, ) - target_args = types.SimpleNamespace( - model_path=args.model_path, - output_path=target_dir, - **test_compiler_util.convert_to_dict(args.target_config), + print( + f"[eval_backend_diff] Target runner: {target_runner_config.strategy.runner_type.value}", + file=sys.stderr, + flush=True, ) - eval_single_model_with_single_backend(ref_args) - eval_single_model_with_single_backend(target_args) + ref_result = ref_runner.run(args.model_path, ref_dir) + if not ref_result.success: + raise RuntimeError(f"Reference run failed: {ref_result.error_message}") + + target_result = target_runner.run(args.model_path, target_dir) + if not target_result.success: + raise RuntimeError(f"Target run failed: {target_result.error_message}") - # compare_perf_diff - # A - ref_dump_path = utils.get_output_path(ref_dir, args.model_path) - ref_out = torch.load(str(ref_dump_path)) + compare_results(ref_result, target_result, ref_runner_config) - ref_log_path = utils.get_log_path(ref_dir, args.model_path) - ref_time_stats = parse_time_stats_from_reference_log(ref_log_path) - # B - target_dump_path = utils.get_output_path(target_dir, args.model_path) - target_out = torch.load(str(target_dump_path)) +def compare_results( + ref_result: RunResult, target_result: RunResult, config: RunnerConfig +): + """Compare outputs and performance between reference and target results.""" + if ref_result.outputs is None or target_result.outputs is None: + print("[Warning] Cannot compare: missing outputs", file=sys.stderr) + return - target_log_path = utils.get_log_path(target_dir, args.model_path) - target_time_stats = parse_time_stats_from_reference_log(target_log_path) + dummy_args = types.SimpleNamespace( + log_prompt=config.execution.log_prompt, + compiler=config.execution.compiler, + device=config.execution.device, + ) - compare_correctness(ref_out, target_out, ref_args) + compare_correctness(ref_result.outputs, target_result.outputs, dummy_args) test_compiler_util.print_times_and_speedup( - ref_args, ref_time_stats, target_time_stats + dummy_args, ref_result.time_stats, target_result.time_stats ) diff --git a/graph_net_bench/torch/runner/__init__.py b/graph_net_bench/torch/runner/__init__.py new file mode 100644 index 000000000..643f28f91 --- /dev/null +++ b/graph_net_bench/torch/runner/__init__.py @@ -0,0 +1,14 @@ +from .base_runner import BaseRunner, RunResult, RunnerConfig, create_runner +from .local_runner import LocalRunner +from .process_runner import ProcessRunner +from .remote_runner import RemoteRunner + +__all__ = [ + "BaseRunner", + "RunResult", + "RunnerConfig", + "LocalRunner", + "ProcessRunner", + "RemoteRunner", + "create_runner", +] diff --git a/graph_net_bench/torch/runner/base_runner.py b/graph_net_bench/torch/runner/base_runner.py new file mode 100644 index 000000000..25d0882c8 --- /dev/null +++ b/graph_net_bench/torch/runner/base_runner.py @@ -0,0 +1,149 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Dict, Optional, Tuple +from pathlib import Path +from enum import Enum + + +class RunnerType(Enum): + LOCAL = "local" + PROCESS = "process" + REMOTE = "remote" + + +@dataclass +class ExecutionConfig: + """Configuration specific to model execution.""" + + compiler: str = "inductor" + device: str = "cuda" + op_lib: str = "default" + warmup: int = 5 + trials: int = 10 + seed: int = 123 + log_prompt: str = "graph-net-runner-log" + backend_config: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return { + k: v + for k, v in self.__dict__.items() + if v is not None and not k.startswith("_") + } + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "ExecutionConfig": + return cls(**{k: v for k, v in d.items() if hasattr(cls, k)}) + + +@dataclass +class RunnerStrategyConfig: + """Configuration for runner strategy selection.""" + + runner_type: RunnerType = RunnerType.LOCAL + remote_machine: str = "localhost" + remote_port: int = 50052 + subprocess_timeout: int = 600 + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "RunnerStrategyConfig": + runner_type_str = d.get("runner_type", "local") + try: + runner_type = RunnerType(runner_type_str.lower()) + except ValueError: + runner_type = RunnerType.LOCAL + + return cls( + runner_type=runner_type, + remote_machine=d.get("machine", "localhost"), + remote_port=d.get("port", 50052), + subprocess_timeout=d.get("subprocess_timeout", 600), + ) + + +@dataclass +class RunnerConfig: + """Unified configuration combining execution and strategy configs.""" + + execution: ExecutionConfig + strategy: RunnerStrategyConfig + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "RunnerConfig": + execution_config = ExecutionConfig.from_dict(d) + strategy_config = RunnerStrategyConfig.from_dict(d) + return cls(execution=execution_config, strategy=strategy_config) + + def to_dict(self) -> Dict[str, Any]: + return { + **self.execution.to_dict(), + "runner_type": self.strategy.runner_type.value, + "machine": self.strategy.remote_machine, + "port": self.strategy.remote_port, + "subprocess_timeout": self.strategy.subprocess_timeout, + } + + +@dataclass +class RunResult: + """Result of a single backend run.""" + + success: bool = False + outputs: Optional[Tuple[Any, ...]] = None + time_stats: Dict[str, Any] = field(default_factory=dict) + log_content: str = "" + error_message: str = "" + + output_path: Optional[Path] = None + log_path: Optional[Path] = None + + +class BaseRunner(ABC): + """Abstract base class for model execution runners.""" + + def __init__(self, config: RunnerConfig): + self.config = config + + @abstractmethod + def run(self, model_path: str, output_dir: str) -> RunResult: + """ + Execute model evaluation and return results. + + Args: + model_path: Path to model directory (containing model.py, graph_net.json, etc.) + output_dir: Directory to store outputs and logs + + Returns: + RunResult containing outputs, timing stats, and logs + """ + pass + + def _get_output_path(self, output_dir: str, model_path: str) -> Path: + from graph_net_bench.torch import utils + + return Path(utils.get_output_path(output_dir, model_path)) + + def _get_log_path(self, output_dir: str, model_path: str) -> Path: + from graph_net_bench.torch import utils + + return Path(utils.get_log_path(output_dir, model_path)) + + +def create_runner(config: RunnerConfig) -> BaseRunner: + """Factory function to create appropriate runner based on config.""" + runner_type = config.strategy.runner_type + + if runner_type == RunnerType.LOCAL: + from .local_runner import LocalRunner + + return LocalRunner(config) + elif runner_type == RunnerType.PROCESS: + from .process_runner import ProcessRunner + + return ProcessRunner(config) + elif runner_type == RunnerType.REMOTE: + from .remote_runner import RemoteRunner + + return RemoteRunner(config) + else: + raise ValueError(f"Unknown runner_type: {runner_type}") diff --git a/graph_net_bench/torch/runner/local_runner.py b/graph_net_bench/torch/runner/local_runner.py new file mode 100644 index 000000000..3d07a6470 --- /dev/null +++ b/graph_net_bench/torch/runner/local_runner.py @@ -0,0 +1,99 @@ +import os +import sys +import json +import types +import traceback +from io import StringIO +from contextlib import redirect_stdout, redirect_stderr + +import torch + +from .base_runner import BaseRunner, RunResult + + +class LocalRunner(BaseRunner): + """Execute model evaluation in the current process.""" + + def run(self, model_path: str, output_dir: str) -> RunResult: + from graph_net_bench.torch import eval_backend_perf + + os.makedirs(output_dir, exist_ok=True) + + log_path = self._get_log_path(output_dir, model_path) + output_path = self._get_output_path(output_dir, model_path) + + eval_args = types.SimpleNamespace( + model_path=model_path, + output_path=output_dir, + seed=self.config.execution.seed, + compiler=self.config.execution.compiler, + device=self.config.execution.device, + op_lib=self.config.execution.op_lib, + warmup=self.config.execution.warmup, + trials=self.config.execution.trials, + log_prompt=self.config.execution.log_prompt, + backend_config=self.config.execution.backend_config, + ) + + log_buffer = StringIO() + result = RunResult( + output_path=output_path, + log_path=log_path, + ) + + try: + eval_backend_perf.register_op_lib(self.config.execution.op_lib) + eval_backend_perf.set_seed(self.config.execution.seed) + + with redirect_stdout(log_buffer), redirect_stderr(log_buffer): + self._run_evaluation(eval_args, result) + + except Exception as e: + result.success = False + result.error_message = f"{str(e)}\n{traceback.format_exc()}" + log_buffer.write(f"\n[ERROR] {result.error_message}\n") + + result.log_content = log_buffer.getvalue() + + with open(log_path, "w", encoding="utf-8") as f: + f.write(result.log_content) + + print(result.log_content, file=sys.stderr, flush=True) + + return result + + def _run_evaluation(self, args: types.SimpleNamespace, result: RunResult): + from graph_net_bench.torch import eval_backend_perf + from graph_net_bench import test_compiler_util + + compiler = eval_backend_perf.get_compiler_backend(args) + input_dict = eval_backend_perf.get_input_dict(args) + model = eval_backend_perf.get_model(args) + model.eval() + + test_compiler_util.print_config( + args, + eval_backend_perf.get_hardward_name(args.device), + eval_backend_perf.get_compiler_version(args.compiler), + ) + + compiled_model = compiler(model) + + def model_call(): + return compiled_model(**input_dict) + + outputs, time_stats = eval_backend_perf.measure_performance( + model_call, args, compiler + ) + + result.success = True + result.outputs = outputs + result.time_stats = time_stats + + if result.output_path: + torch.save(outputs, str(result.output_path)) + + test_compiler_util.print_running_status(args, True) + test_compiler_util.print_with_log_prompt( + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + ) diff --git a/graph_net_bench/torch/runner/process_runner.py b/graph_net_bench/torch/runner/process_runner.py new file mode 100644 index 000000000..9ac68607e --- /dev/null +++ b/graph_net_bench/torch/runner/process_runner.py @@ -0,0 +1,102 @@ +import os +import sys +import subprocess +from pathlib import Path + +import torch + +from .base_runner import BaseRunner, RunResult + + +class ProcessRunner(BaseRunner): + """Execute model evaluation in a separate subprocess on the local machine.""" + + def run(self, model_path: str, output_dir: str) -> RunResult: + os.makedirs(output_dir, exist_ok=True) + + log_path = self._get_log_path(output_dir, model_path) + output_path = self._get_output_path(output_dir, model_path) + + result = RunResult( + output_path=output_path, + log_path=log_path, + ) + + cmd = self._build_command(model_path, output_dir) + print(f"[ProcessRunner] Executing: {cmd}", file=sys.stderr, flush=True) + + try: + env = os.environ.copy() + repo_root = Path(__file__).resolve().parents[3] + env["PYTHONPATH"] = f"{repo_root}:{env.get('PYTHONPATH', '')}" + + proc = subprocess.run( + cmd, + shell=True, + env=env, + capture_output=True, + text=True, + timeout=self.config.strategy.subprocess_timeout, + ) + + result.log_content = proc.stderr or "" + + if proc.returncode != 0: + result.success = False + result.error_message = ( + f"Process exited with code {proc.returncode}\n" + f"stdout: {proc.stdout}\n" + f"stderr: {proc.stderr}" + ) + else: + result.success = True + self._parse_result(result, output_dir, model_path) + + except subprocess.TimeoutExpired as e: + result.success = False + result.error_message = f"Process timed out: {e}" + except Exception as e: + result.success = False + result.error_message = f"Process execution failed: {e}" + + print(result.log_content, file=sys.stderr, flush=True) + return result + + def _build_command(self, model_path: str, output_dir: str) -> str: + cmd_parts = [ + sys.executable, + "-m", + "graph_net_bench.torch.eval_backend_perf", + "--model-path", + model_path, + "--output-path", + output_dir, + ] + + config_dict = self.config.to_dict() + from graph_net_bench import test_compiler_util + + config_str = test_compiler_util.convert_to_base64(config_dict) + cmd_parts.extend(["--config", config_str]) + + return " ".join(cmd_parts) + + def _parse_result(self, result: RunResult, output_dir: str, model_path: str): + from graph_net_bench import test_compiler_util + + if result.output_path and result.output_path.exists(): + try: + result.outputs = torch.load(str(result.output_path)) + except Exception as e: + result.error_message += f"\nFailed to load outputs: {e}" + + if result.log_path and result.log_path.exists(): + try: + result.log_content = test_compiler_util.extract_log_content( + str(result.log_path) + ) + result.time_stats = test_compiler_util.parse_performance_stats( + str(result.log_path) + ) + except Exception as e: + result.error_message += f"\nFailed to parse log: {e}" diff --git a/graph_net_bench/torch/runner/remote_runner.py b/graph_net_bench/torch/runner/remote_runner.py new file mode 100644 index 000000000..82f580db5 --- /dev/null +++ b/graph_net_bench/torch/runner/remote_runner.py @@ -0,0 +1,135 @@ +import os +import sys +from typing import Dict + +import torch + +from .base_runner import BaseRunner, RunResult + + +class RemoteRunner(BaseRunner): + """Execute model evaluation on a remote machine via gRPC.""" + + def run(self, model_path: str, output_dir: str) -> RunResult: + from graph_net_rpc.sample_remote_executor import SampleRemoteExecutor + + os.makedirs(output_dir, exist_ok=True) + + log_path = self._get_log_path(output_dir, model_path) + output_path = self._get_output_path(output_dir, model_path) + + result = RunResult( + output_path=output_path, + log_path=log_path, + ) + + rpc_cmd = self._build_rpc_command() + executor = SampleRemoteExecutor( + machine=self.config.strategy.remote_machine, + port=self.config.strategy.remote_port, + ) + + try: + print( + f"[RemoteRunner] Sending to {self.config.machine}:{self.config.port}", + file=sys.stderr, + flush=True, + ) + print(f"[RemoteRunner] rpc_cmd: {rpc_cmd}", file=sys.stderr, flush=True) + + files_dict = executor.execute(model_path, rpc_cmd) + self._process_remote_output(result, files_dict, output_dir, model_path) + result.success = True + + except Exception as e: + import traceback + + result.success = False + result.error_message = ( + f"Remote execution failed: {e}\n{traceback.format_exc()}" + ) + print(result.error_message, file=sys.stderr, flush=True) + + finally: + executor.close() + + return result + + def _build_rpc_command(self) -> str: + cmd = "python3 -m graph_net.torch.test_reference_device" + cmd += ' --model-path "$INPUT_WORKSPACE"' + cmd += ' --reference-dir "$OUTPUT_WORKSPACE"' + cmd += f" --compiler {self.config.execution.compiler}" + cmd += f" --device {self.config.execution.device}" + cmd += f" --op-lib {self.config.execution.op_lib}" + cmd += f" --warmup {self.config.execution.warmup}" + cmd += f" --trials {self.config.execution.trials}" + cmd += f" --seed {self.config.execution.seed}" + + if self.config.execution.log_prompt: + cmd += f" --log-prompt {self.config.execution.log_prompt}" + if self.config.execution.backend_config: + cmd += f" --config {self.config.execution.backend_config}" + + return cmd + + def _process_remote_output( + self, + result: RunResult, + files_dict: Dict[str, bytes], + output_dir: str, + model_path: str, + ): + from graph_net_bench import test_compiler_util + + log_filename = result.log_path.name if result.log_path else None + pth_filename = result.output_path.name if result.output_path else None + + available_logs = sorted([k for k in files_dict.keys() if k.endswith(".log")]) + available_pths = sorted([k for k in files_dict.keys() if k.endswith(".pth")]) + + if log_filename not in files_dict and len(available_logs) == 1: + log_filename = available_logs[0] + if pth_filename not in files_dict and len(available_pths) == 1: + pth_filename = available_pths[0] + + if log_filename and log_filename in files_dict: + log_bytes = files_dict[log_filename] + if result.log_path: + with open(result.log_path, "wb") as f: + f.write(log_bytes) + try: + result.log_content = log_bytes.decode("utf-8") + print(result.log_content, file=sys.stderr, flush=True) + except Exception: + result.log_content = f"[Binary log, {len(log_bytes)} bytes]" + # Write binary content as text for parsing + with open(result.log_path, "wb") as f: + f.write(log_bytes) + + try: + result.time_stats = test_compiler_util.parse_performance_stats( + str(result.log_path) + ) + except Exception as e: + print(f"Warning: Failed to parse time stats: {e}", file=sys.stderr) + else: + print( + f"Warning: log not found. expected={log_filename}, available={available_logs}", + file=sys.stderr, + ) + + if pth_filename and pth_filename in files_dict: + pth_bytes = files_dict[pth_filename] + if result.output_path: + with open(result.output_path, "wb") as f: + f.write(pth_bytes) + try: + result.outputs = torch.load(str(result.output_path)) + except Exception as e: + print(f"Warning: Failed to load outputs: {e}", file=sys.stderr) + else: + print( + f"Warning: output not found. expected={pth_filename}, available={available_pths}", + file=sys.stderr, + ) diff --git a/test/eval_device_diff_test.sh b/test/eval_device_diff_test.sh new file mode 100755 index 000000000..6840b53a7 --- /dev/null +++ b/test/eval_device_diff_test.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +AI4C_ROOT=$(python3 -c "import graph_net_bench; import os; print(os.path.dirname(os.path.dirname(graph_net_bench.__file__)))") +OUTPUT_PATH=/tmp/workspace_eval_device_diff_test + +mkdir -p "$OUTPUT_PATH" +model_list="$AI4C_ROOT/test/workspace_eval_backend_diff/sample_list.txt" + +# Default remote server settings (can be overridden by environment variables) +REMOTE_MACHINE="${REMOTE_MACHINE:-localhost}" +REMOTE_PORT="${REMOTE_PORT:-50052}" + +python3 -m graph_net_bench.torch.eval_backend_diff \ + --model-path-list $model_list \ + --reference-config $(base64 -w 0 <&1 | tee "$OUTPUT_PATH/validation.log" \ No newline at end of file From 1250435c79cf4ec7322145ca63df9dbe50381c00 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Mon, 26 Jan 2026 13:30:09 +0800 Subject: [PATCH 19/20] minor fix --- graph_net_bench/torch/runner/remote_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graph_net_bench/torch/runner/remote_runner.py b/graph_net_bench/torch/runner/remote_runner.py index 82f580db5..74c5e651f 100644 --- a/graph_net_bench/torch/runner/remote_runner.py +++ b/graph_net_bench/torch/runner/remote_runner.py @@ -31,7 +31,7 @@ def run(self, model_path: str, output_dir: str) -> RunResult: try: print( - f"[RemoteRunner] Sending to {self.config.machine}:{self.config.port}", + f"[RemoteRunner] Sending to {self.config.strategy.remote_machine}:{self.config.strategy.remote_port}", file=sys.stderr, flush=True, ) From c92f93133977ad5e8651971bcc8e780c7774e60b Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Mon, 26 Jan 2026 14:43:54 +0800 Subject: [PATCH 20/20] Optimize code style --- graph_net/paddle/test_compiler.py | 6 +- graph_net/paddle/test_reference_device.py | 2 +- graph_net/paddle/test_target_device.py | 2 +- graph_net_bench/torch/eval_backend_diff.py | 413 ++++++++------ graph_net_bench/torch/eval_backend_perf.py | 508 +++++++++++------- graph_net_bench/torch/runner/base_runner.py | 25 +- graph_net_bench/torch/runner/local_runner.py | 127 +++-- .../torch/runner/process_runner.py | 143 +++-- graph_net_bench/torch/runner/remote_runner.py | 222 +++++--- graph_net_bench/torch/test_compiler.py | 6 +- 10 files changed, 906 insertions(+), 548 deletions(-) diff --git a/graph_net/paddle/test_compiler.py b/graph_net/paddle/test_compiler.py index 76c3d5610..8beea67fb 100644 --- a/graph_net/paddle/test_compiler.py +++ b/graph_net/paddle/test_compiler.py @@ -43,7 +43,7 @@ def init_env(args): paddle.set_flags({"FLAGS_cudnn_exhaustive_search": 1}) -def get_hardward_name(args): +def get_hardware_name(args): hardware = "unknown" if test_compiler_util.is_gpu_device(args.device): hardware = paddle.device.cuda.get_device_name(0) @@ -149,7 +149,7 @@ def measure_performance(model_call, args, compiler, profile=False): min_trials = int(100 / np.mean(warmup_e2e_times[1:])) trials = max(args.trials, min_trials) - hardware_name = get_hardward_name(args) + hardware_name = get_hardware_name(args) print( f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {trials}", file=sys.stderr, @@ -327,7 +327,7 @@ def test_single_model(args): model.eval() test_compiler_util.print_basic_config( - args, get_hardward_name(args), get_compile_framework_version(args) + args, get_hardware_name(args), get_compile_framework_version(args) ) # Run on eager mode diff --git a/graph_net/paddle/test_reference_device.py b/graph_net/paddle/test_reference_device.py index f1db9bc0f..4c7c60b5b 100644 --- a/graph_net/paddle/test_reference_device.py +++ b/graph_net/paddle/test_reference_device.py @@ -49,7 +49,7 @@ def test_single_model(args): test_compiler_util.print_basic_config( args, - test_compiler.get_hardward_name(args), + test_compiler.get_hardware_name(args), test_compiler.get_compile_framework_version(args), ) diff --git a/graph_net/paddle/test_target_device.py b/graph_net/paddle/test_target_device.py index 9697aea5d..08176680d 100644 --- a/graph_net/paddle/test_target_device.py +++ b/graph_net/paddle/test_target_device.py @@ -89,7 +89,7 @@ def test_single_model(args): test_compiler_util.print_basic_config( args, - test_compiler.get_hardward_name(args), + test_compiler.get_hardware_name(args), test_compiler.get_compile_framework_version(args), ) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 68e1f2f02..ce9f27b2f 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,220 +1,320 @@ +"""Backend Performance Difference Evaluation Script. + +Compares outputs and performance between reference and target compiler backends. +""" + import argparse -import torch -import sys import os -import os.path +import sys import traceback import types -from graph_net_bench import test_compiler_util +from typing import Any, List, Optional, Tuple + +import torch + from graph_net_bench import path_utils +from graph_net_bench import test_compiler_util from .runner import RunnerConfig, RunResult, create_runner +_DEFAULT_REF_DIR = "/tmp/eval_perf_diff/reference" +_DEFAULT_TARGET_DIR = "/tmp/eval_perf_diff/target" + + +def _get_dtype_name(value: Any) -> str: + """Extract dtype name from tensor or type name from other objects.""" + if isinstance(value, torch.Tensor): + return str(value.dtype).replace("torch.", "") + return type(value).__name__ + + +def _extract_dtypes(outputs: List[Any]) -> List[str]: + """Extract dtype/type names from a list of outputs.""" + return [_get_dtype_name(x) for x in outputs] -def compare_correctness(expected_out, compiled_out, args): - eager_dtypes = [ - ( - str(x.dtype).replace("torch.", "") - if isinstance(x, torch.Tensor) - else type(x).__name__ - ) - for x in expected_out - ] - compiled_dtypes = [ - ( - str(x.dtype).replace("torch.", "") - if isinstance(x, torch.Tensor) - else type(x).__name__ - ) - for x in compiled_out - ] - # datatype check +def compare_correctness( + expected_out: List[torch.Tensor], + compiled_out: List[torch.Tensor], + args, +) -> None: + """Compare correctness between expected and compiled outputs. + + Args: + expected_out: List of expected output tensors. + compiled_out: List of compiled output tensors. + args: Arguments containing log_prompt and other settings. + """ + eager_dtypes = _extract_dtypes(expected_out) + compiled_dtypes = _extract_dtypes(compiled_out) + type_match = test_compiler_util.check_output_datatype( args, eager_dtypes, compiled_dtypes ) + if not type_match: + return - if type_match: - test_compiler_util.check_equal( - args, - expected_out, - compiled_out, - cmp_equal_func=get_cmp_equal, - ) - - test_compiler_util.check_allclose( - args, - expected_out, - compiled_out, - cmp_all_close_func=get_cmp_all_close, - cmp_max_diff_func=get_cmp_max_diff, - cmp_mean_diff_func=get_cmp_mean_diff, - ) + test_compiler_util.check_equal( + args, + expected_out, + compiled_out, + cmp_equal_func=get_cmp_equal, + ) + test_compiler_util.check_allclose( + args, + expected_out, + compiled_out, + cmp_all_close_func=get_cmp_all_close, + cmp_max_diff_func=get_cmp_max_diff, + cmp_mean_diff_func=get_cmp_mean_diff, + ) -def get_cmp_equal(expected_out, compiled_out): +def get_cmp_equal( + expected_out: List[torch.Tensor], compiled_out: List[torch.Tensor] +) -> str: + """Get space-separated string of equality check results (1=equal, 0=not).""" return " ".join( str(int(torch.equal(a, b))) for a, b in zip(expected_out, compiled_out) ) -def get_cmp_all_close(expected_out, compiled_out, atol, rtol): +def get_cmp_all_close( + expected_out: List[torch.Tensor], + compiled_out: List[torch.Tensor], + atol: float, + rtol: float, +) -> str: + """Get space-separated string of allclose check results.""" return " ".join( str(int(torch.allclose(a, b, atol=atol, rtol=rtol))) for a, b in zip(compiled_out, expected_out) ) -def get_cmp_max_diff(expected_out, compiled_out): +def _compute_abs_diff(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + """Compute absolute difference, converting to float for LongTensor compatibility.""" + return torch.abs(a.float() - b.float()) + + +def get_cmp_max_diff( + expected_out: List[torch.Tensor], compiled_out: List[torch.Tensor] +) -> str: + """Get space-separated string of max absolute differences.""" + return " ".join( + str(torch.max(_compute_abs_diff(a, b)).item()) + for a, b in zip(expected_out, compiled_out) + ) + + +def get_cmp_mean_diff( + expected_out: List[torch.Tensor], compiled_out: List[torch.Tensor] +) -> str: + """Get space-separated string of mean absolute differences.""" return " ".join( - # Transform to float to handle LongTensor output of some models, which cannnot be processed with torch.max(). - str(torch.max(torch.abs(a.float() - b.float())).item()) + str(torch.mean(_compute_abs_diff(a, b)).item()) for a, b in zip(expected_out, compiled_out) ) -def get_cmp_mean_diff(expected_out, compiled_out): +def _count_diff_elements( + a: torch.Tensor, b: torch.Tensor, atol: float, rtol: float +) -> int: + """Count number of differing elements between two tensors.""" + if a.is_floating_point() and b.is_floating_point(): + return torch.sum(~torch.isclose(a, b, atol=atol, rtol=rtol)).item() + return torch.sum(a != b).item() + + +def get_cmp_diff_count( + expected_out: List[torch.Tensor], + compiled_out: List[torch.Tensor], + atol: float, + rtol: float, +) -> str: + """Get space-separated string of element difference counts.""" return " ".join( - # To handle LongTensor - str(torch.mean(torch.abs(a.float() - b.float())).item()) + str(_count_diff_elements(a, b, atol, rtol)) for a, b in zip(expected_out, compiled_out) ) -def get_cmp_diff_count(expected_out, compiled_out, atol, rtol): - results = [] - for a, b in zip(expected_out, compiled_out): - # To handle LongTensor - if a.is_floating_point() and b.is_floating_point(): - diff_count = torch.sum(~torch.isclose(a, b, atol=atol, rtol=rtol)).item() - else: - diff_count = torch.sum(a != b).item() - results.append(str(diff_count)) - return " ".join(results) +def _has_model_file(path: str) -> bool: + """Check if directory contains model.py.""" + return os.path.exists(os.path.join(path, "model.py")) -def _get_model_paths(args, model_path_prefix, use_model_list): +def _get_model_paths_from_list( + model_path_list: str, model_path_prefix: str +) -> List[str]: + """Get model paths from a list file with prefix.""" + assert os.path.isdir(model_path_prefix), f"Not a directory: {model_path_prefix}" + assert os.path.isfile(model_path_list), f"Not a file: {model_path_list}" + + test_samples = test_compiler_util.get_allow_samples( + model_path_list, model_path_prefix + ) + return [ + os.path.join(model_path_prefix, rel_path) + for rel_path in test_samples + if _has_model_file(os.path.join(model_path_prefix, rel_path)) + ] + + +def _get_model_paths_from_dir( + model_path: str, model_path_list: Optional[str], model_path_prefix: Optional[str] +) -> List[str]: + """Get model paths by recursively scanning a directory.""" + assert os.path.isdir(model_path), f"Not a directory: {model_path}" + + test_samples = test_compiler_util.get_allow_samples( + model_path_list, model_path_prefix + ) + all_paths = path_utils.get_recursively_model_path(model_path) + + if test_samples is None: + return list(all_paths) + return [p for p in all_paths if os.path.abspath(p) in test_samples] + + +def _get_model_paths( + args, model_path_prefix: Optional[str], use_model_list: bool +) -> List[str]: + """Get list of model paths based on configuration.""" if use_model_list: - assert os.path.isdir(model_path_prefix) and os.path.isfile(args.model_path_list) + return _get_model_paths_from_list(args.model_path_list, model_path_prefix) + return _get_model_paths_from_dir( + args.model_path, args.model_path_list, model_path_prefix + ) - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - model_paths = [ - os.path.join(model_path_prefix, rel_model_path) - for rel_model_path in test_samples - if os.path.exists( - os.path.join(model_path_prefix, rel_model_path, "model.py") - ) - ] - else: - assert os.path.isdir(args.model_path) - - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - model_paths = [ - model_path - for model_path in path_utils.get_recursively_model_path(args.model_path) - if test_samples is None or os.path.abspath(model_path) in test_samples - ] - return model_paths +def _create_model_args( + model_path: str, reference_config: str, target_config: str +) -> argparse.Namespace: + """Create namespace for single model evaluation.""" + return argparse.Namespace( + model_path=model_path, + model_path_list=None, + reference_config=reference_config, + target_config=target_config, + ) + +def _eval_single_model_safe(model_args: argparse.Namespace) -> bool: + """Evaluate single model with exception handling. -def _create_model_args(model_path, reference_config, target_config): - args = argparse.Namespace() - args.model_path = model_path - args.model_path_list = None - args.reference_config = reference_config - args.target_config = target_config - return args + Returns: + True if evaluation succeeded, False otherwise. + """ + try: + eval_single_model(model_args) + return True + except KeyboardInterrupt: + print("KeyboardInterrupt") + sys.exit(1) + except Exception: + print("\n--- Full Traceback ---") + traceback.print_exc() + return False + + +def _print_evaluation_summary(total_count: int, failed_samples: List[str]) -> None: + """Print summary of multi-model evaluation.""" + print( + f"Totally {total_count} verified samples, failed {len(failed_samples)} samples.", + file=sys.stderr, + flush=True, + ) + for model_path in failed_samples: + print(f"- {model_path}", file=sys.stderr, flush=True) -def eval_multi_models(args, model_path_prefix=None, use_model_list=False): +def eval_multi_models( + args, + model_path_prefix: Optional[str] = None, + use_model_list: bool = False, +) -> None: + """Evaluate multiple models and collect results.""" module_name = os.path.splitext(os.path.basename(__file__))[0] - model_paths = _get_model_paths(args, model_path_prefix, use_model_list) - failed_samples = [] + failed_samples: List[str] = [] + for sample_idx, model_path in enumerate(model_paths): print( f"[{sample_idx}] {module_name}, model_path: {model_path}", file=sys.stderr, flush=True, ) - - model_args = argparse.Namespace() - model_args.model_path = model_path - model_args.model_path_list = None - model_args.reference_config = args.reference_config - model_args.target_config = args.target_config - - try: - eval_single_model(model_args) - success = True - except KeyboardInterrupt: - print("KeyboardInterrupt") - sys.exit(1) - except Exception: - print("\n--- Full Traceback ---") - traceback.print_exc() - success = False - + model_args = _create_model_args( + model_path, args.reference_config, args.target_config + ) + success = _eval_single_model_safe(model_args) if not success: failed_samples.append(model_path) - print( - f"Totally {len(model_paths)} verified samples, failed {len(failed_samples)} samples.", - file=sys.stderr, - flush=True, + _print_evaluation_summary(len(model_paths), failed_samples) + + +def _parse_runner_configs(args) -> Tuple[RunnerConfig, RunnerConfig]: + """Parse reference and target runner configurations.""" + return ( + RunnerConfig.from_dict( + test_compiler_util.convert_to_dict(args.reference_config) + ), + RunnerConfig.from_dict(test_compiler_util.convert_to_dict(args.target_config)), ) - if failed_samples: - for model_path in failed_samples: - print(f"- {model_path}", file=sys.stderr, flush=True) -def eval_single_model(args): - """ - Unified evaluation using Runner abstraction. - Supports local, process, and remote execution via runner_type in config. - """ - ref_dir = "/tmp/eval_perf_diff/reference" - target_dir = "/tmp/eval_perf_diff/target" +def _log_runner_info(ref_config: RunnerConfig, target_config: RunnerConfig) -> None: + """Log runner type information.""" + for label, cfg in [("Reference", ref_config), ("Target", target_config)]: + print( + f"[eval_backend_diff] {label} runner: {cfg.strategy.runner_type.value}", + file=sys.stderr, + flush=True, + ) + + +def _run_and_validate( + runner, model_path: str, output_dir: str, label: str +) -> RunResult: + """Run model and validate result.""" + result = runner.run(model_path, output_dir) + if not result.success: + raise RuntimeError(f"{label} run failed: {result.error_message}") + return result - ref_config_dict = test_compiler_util.convert_to_dict(args.reference_config) - target_config_dict = test_compiler_util.convert_to_dict(args.target_config) - ref_runner_config = RunnerConfig.from_dict(ref_config_dict) - target_runner_config = RunnerConfig.from_dict(target_config_dict) +def eval_single_model(args) -> None: + """Evaluate single model using Runner abstraction. + + Supports local, process, and remote execution via runner_type in config. + """ + ref_runner_config, target_runner_config = _parse_runner_configs(args) + _log_runner_info(ref_runner_config, target_runner_config) ref_runner = create_runner(ref_runner_config) target_runner = create_runner(target_runner_config) - print( - f"[eval_backend_diff] Reference runner: {ref_runner_config.strategy.runner_type.value}", - file=sys.stderr, - flush=True, + ref_result = _run_and_validate( + ref_runner, args.model_path, _DEFAULT_REF_DIR, "Reference" ) - print( - f"[eval_backend_diff] Target runner: {target_runner_config.strategy.runner_type.value}", - file=sys.stderr, - flush=True, + target_result = _run_and_validate( + target_runner, args.model_path, _DEFAULT_TARGET_DIR, "Target" ) - ref_result = ref_runner.run(args.model_path, ref_dir) - if not ref_result.success: - raise RuntimeError(f"Reference run failed: {ref_result.error_message}") - - target_result = target_runner.run(args.model_path, target_dir) - if not target_result.success: - raise RuntimeError(f"Target run failed: {target_result.error_message}") - compare_results(ref_result, target_result, ref_runner_config) def compare_results( ref_result: RunResult, target_result: RunResult, config: RunnerConfig -): - """Compare outputs and performance between reference and target results.""" +) -> None: + """Compare outputs and performance between reference and target results. + + Args: + ref_result: Result from reference runner. + target_result: Result from target runner. + config: Runner configuration for logging settings. + """ if ref_result.outputs is None or target_result.outputs is None: print("[Warning] Cannot compare: missing outputs", file=sys.stderr) return @@ -231,20 +331,31 @@ def compare_results( ) -def main(args): +def main(args: argparse.Namespace) -> None: + """Main entry point for backend difference evaluation. + + Args: + args: Parsed command-line arguments. + + Raises: + ValueError: If model_path is invalid. + """ ref_config = test_compiler_util.convert_to_dict(args.reference_config) model_path_prefix = ref_config.get("model_path_prefix") if args.model_path_list and model_path_prefix: eval_multi_models(args, model_path_prefix, use_model_list=True) - elif os.path.isdir(args.model_path): - if path_utils.is_single_model_dir(args.model_path): - eval_single_model(args) - else: - eval_multi_models(args, model_path_prefix, use_model_list=False) - else: + return + + if not os.path.isdir(args.model_path): raise ValueError(f"Invalid model path: {args.model_path}") + if path_utils.is_single_model_dir(args.model_path): + eval_single_model(args) + return + + eval_multi_models(args, model_path_prefix, use_model_list=False) + if __name__ == "__main__": parser = argparse.ArgumentParser( diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 5c8586f30..30bf2dacb 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -1,142 +1,288 @@ -from . import utils +"""Single Backend Performance Evaluation Script.""" + import argparse import importlib.util -import torch -from pathlib import Path -from typing import Type -import sys -import os -import traceback import json -import random -import numpy as np +import os import platform +import random +import sys +import traceback import types from contextlib import redirect_stdout, redirect_stderr -from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend +from pathlib import Path +from typing import Callable, Dict, Any, List, Tuple, Type, Optional + +import numpy as np +import torch + +from . import utils from graph_net_bench import test_compiler_util +from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend +_ARG_DEFAULTS: Dict[str, Any] = { + "model_path": None, + "output_path": None, + "seed": 123, + "compiler": "inductor", + "device": "cuda", + "op_lib": None, + "warmup": 3, + "trials": 5, + "log_prompt": "graph-net-bench-log", + "model_path_prefix": None, + "backend_config": None, +} -def register_op_lib(op_lib): - if op_lib == "flaggems": - import flag_gems - flag_gems.enable() - else: - pass +def register_op_lib(op_lib: Optional[str]) -> None: + """Register operator library if specified.""" + if op_lib != "flaggems": + return + import flag_gems + flag_gems.enable() -def set_seed(random_seed): + +def set_seed(random_seed: int) -> None: + """Set random seed for reproducibility across all frameworks.""" random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(random_seed) - torch.cuda.manual_seed_all(random_seed) + if not torch.cuda.is_available(): + return + torch.cuda.manual_seed(random_seed) + torch.cuda.manual_seed_all(random_seed) -def get_hardward_name(device): - hardware_name = "unknown" +def get_hardware_name(device: str) -> str: + """Get hardware name based on device type.""" if "cuda" in device: - hardware_name = torch.cuda.get_device_name(device) - elif device == "cpu": - hardware_name = platform.processor() - return hardware_name + return torch.cuda.get_device_name(device) + if device == "cpu": + return platform.processor() + return "unknown" + + +def get_compiler_version(compiler_name: str) -> str: + """Get version string for the given compiler. + Args: + compiler_name: Name of the compiler (e.g., 'inductor', 'tvm'). -def get_compiler_version(compiler): - if compiler in ["inductor", "nope", "unstable_to_stable"]: + Returns: + Version string or 'unknown' if not determinable. + """ + torch_based_compilers = {"inductor", "nope", "unstable_to_stable"} + if compiler_name in torch_based_compilers: return torch.__version__ - elif compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: - # Assuming compiler object has a version attribute - return f"{compiler.capitalize()} {compiler.version}" + # TODO: For external compilers, version detection would require runtime introspection + # which is not reliably available here. Return a placeholder. return "unknown" -def load_class_from_file( - model_path: str, class_name: str, device: str -) -> Type[torch.nn.Module]: - file_path = f"{model_path}/model.py" - file = Path(file_path).resolve() - module_name = file.stem - +def _read_and_modify_model_code(file_path: str, device: str) -> str: + """Read model file and modify code for target device.""" with open(file_path, "r", encoding="utf-8") as f: model_code = f.read() - model_code = utils.modify_code_by_device(model_code, device) + return utils.modify_code_by_device(model_code, device) + + +def _create_module_from_code( + module_name: str, code: str, file_path: Path +) -> types.ModuleType: + """Create a module by executing code.""" spec = importlib.util.spec_from_loader(module_name, loader=None) module = importlib.util.module_from_spec(spec) sys.modules[module_name] = module - compiled_code = compile(model_code, filename=file, mode="exec") + compiled_code = compile(code, filename=file_path, mode="exec") exec(compiled_code, module.__dict__) + return module - model_class = getattr(module, class_name, None) - setattr(model_class, "__graph_net_file_path__", file_path) - setattr(model_class, "__graph_net_device__", device) - return model_class +def load_class_from_file( + model_path: str, class_name: str, device: str +) -> Type[torch.nn.Module]: + """Dynamically load a model class from file. -def get_compiler_backend(args) -> GraphCompilerBackend: - """ - Dynamically load backend class based on args.compiler + Args: + model_path: Directory containing model.py. + class_name: Name of the class to load. + device: Target device for code modification. + + Returns: + The loaded model class with metadata attributes set. + + Raises: + AttributeError: If class_name not found in module. """ - compiler_name = args.compiler.lower() - module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" + file_path = f"{model_path}/model.py" + resolved_path = Path(file_path).resolve() + module_name = resolved_path.stem - try: - module = __import__(module_name, fromlist=[f"{compiler_name.title()}Backend"]) + model_code = _read_and_modify_model_code(file_path, device) + module = _create_module_from_code(module_name, model_code, resolved_path) - class_name = ( - f"{''.join(part.title() for part in compiler_name.split('_'))}Backend" - ) + model_class = getattr(module, class_name) + model_class.__graph_net_file_path__ = file_path + model_class.__graph_net_device__ = device + return model_class - backend_class = None - if hasattr(module, class_name): - backend_class = getattr(module, class_name) - else: - raise ImportError(f"No valid backend class found in {module_name}") - except ImportError as e: - raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") +def _build_backend_class_name(compiler_name: str) -> str: + """Convert compiler name to PascalCase backend class name.""" + return "".join(part.title() for part in compiler_name.split("_")) + "Backend" - backend_config = ( - test_compiler_util.convert_to_dict(args.backend_config) - if args.backend_config is not None - else {} - ) - return backend_class(backend_config) +def _load_backend_class(compiler_name: str) -> Type[GraphCompilerBackend]: + """Load backend class by compiler name.""" + module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" + class_name = _build_backend_class_name(compiler_name) + + module = __import__(module_name, fromlist=[class_name]) + if not hasattr(module, class_name): + raise ImportError( + f"No valid backend class '{class_name}' found in {module_name}" + ) + return getattr(module, class_name) -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device - # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file( - args.model_path, class_name="GraphModule", device=device - ) - model = model_class().to(torch.device(args.device)) - return model +def get_compiler_backend(args) -> GraphCompilerBackend: + """Dynamically load and instantiate backend class based on args.compiler.""" + backend_class = _load_backend_class(args.compiler.lower()) + backend_config = test_compiler_util.convert_to_dict(args.backend_config) or {} + return backend_class(backend_config) -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") - params = inputs_params["weight_info"] +def get_model(args) -> torch.nn.Module: + """Load and prepare model for evaluation.""" + load_device = "xla" if args.compiler == "xla" else args.device + model_class = load_class_from_file(args.model_path, "GraphModule", load_device) + return model_class().to(torch.device(args.device)) + + +def _update_tensor_device(params: Dict[str, Any], device: str) -> None: + """Update device info in tensor metadata in-place.""" for tensor_meta in params.values(): if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device + tensor_meta["info"]["device"] = device + + +def get_input_dict(args) -> Dict[str, torch.Tensor]: + """Load and prepare input tensors for model evaluation. + + Args: + args: Arguments containing model_path and device settings. + + Returns: + Dictionary mapping parameter names to tensors on target device. + """ + inputs_params = utils.load_converted_from_text(args.model_path) + params = inputs_params["weight_info"] + _update_tensor_device(params, args.device) + + target_device = torch.device(args.device) + return {k: utils.replay_tensor(v).to(target_device) for k, v in params.items()} + + +def _run_warmup(model_call: Callable, warmup_count: int, sync_fn: Callable) -> None: + """Execute warmup runs.""" + for _ in range(warmup_count): + model_call() + sync_fn() + + +def _measure_single_trial_cuda( + model_call: Callable, sync_fn: Callable +) -> Tuple[float, float]: + """Measure a single trial on CUDA device. + + Returns: + Tuple of (e2e_time_ms, gpu_time_ms). + """ + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + duration_box = test_compiler_util.DurationBox(-1) + + with test_compiler_util.naive_timer(duration_box, sync_fn): + start_event.record() + model_call() + end_event.record() + sync_fn() + + gpu_time_ms = start_event.elapsed_time(end_event) + return duration_box.value, gpu_time_ms + + +def _measure_single_trial_cpu(model_call: Callable, sync_fn: Callable) -> float: + """Measure a single trial on CPU or other devices. + + Returns: + End-to-end time in milliseconds. + """ + duration_box = test_compiler_util.DurationBox(-1) + with test_compiler_util.naive_timer(duration_box, sync_fn): + model_call() + return duration_box.value + + +def _run_cuda_trials( + model_call: Callable, trials: int, sync_fn: Callable +) -> Dict[str, Any]: + """Run multiple timing trials on CUDA device.""" + torch.cuda.empty_cache() + e2e_times: List[float] = [] + gpu_times: List[float] = [] + + for i in range(trials): + e2e_time, gpu_time = _measure_single_trial_cuda(model_call, sync_fn) + e2e_times.append(e2e_time) + gpu_times.append(gpu_time) + print( + f"Trial {i + 1}: e2e={e2e_time:.5f} ms, gpu={gpu_time:.5f} ms", + file=sys.stderr, + flush=True, + ) + return { - k: utils.replay_tensor(v).to(torch.device(args.device)) - for k, v in params.items() + "e2e": test_compiler_util.get_timing_stats(e2e_times), + "gpu": test_compiler_util.get_timing_stats(gpu_times), } -def measure_performance(model_call, args, compiler): - stats = {} - outs = model_call() +def _run_cpu_trials( + model_call: Callable, trials: int, sync_fn: Callable +) -> Dict[str, Any]: + """Run multiple timing trials on CPU or other devices.""" + e2e_times: List[float] = [] + + for i in range(trials): + e2e_time = _measure_single_trial_cpu(model_call, sync_fn) + e2e_times.append(e2e_time) + print( + f"Trial {i + 1}: e2e={e2e_time:.5f} ms", + file=sys.stderr, + flush=True, + ) + + return {"e2e": test_compiler_util.get_timing_stats(e2e_times)} - # Warmup runs - for _ in range(args.warmup): - model_call() - compiler.synchronize() + +def measure_performance( + model_call: Callable, args, compiler +) -> Tuple[Any, Dict[str, Any]]: + """Measure model inference performance. + + Args: + model_call: Callable that executes the model. + args: Arguments containing device, warmup, and trials settings. + compiler: Compiler backend with synchronize method. + + Returns: + Tuple of (model_outputs, timing_stats). + """ + outs = model_call() + _run_warmup(model_call, args.warmup, compiler.synchronize) print( f"[Profiling] Warm up {args.warmup}, Trials {args.trials}", @@ -144,58 +290,83 @@ def measure_performance(model_call, args, compiler): flush=True, ) - if "cuda" in args.device: - torch.cuda.empty_cache() - e2e_times = [] - gpu_times = [] - - for i in range(args.trials): - # End-to-end timing (naive_timer) - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - # GPU-only timing (CUDA Events) - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - model_call() - - end_event.record() - compiler.synchronize() - - gpu_time_ms = start_event.elapsed_time(end_event) - e2e_times.append(duration_box.value) - gpu_times.append(gpu_time_ms) - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", - file=sys.stderr, - flush=True, - ) - - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) - - else: # CPU or other devices - e2e_times = [] - for i in range(args.trials): - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - model_call() - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", - file=sys.stderr, - flush=True, - ) - e2e_times.append(duration_box.value) - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) + is_cuda = "cuda" in args.device + if is_cuda: + stats = _run_cuda_trials(model_call, args.trials, compiler.synchronize) + else: + stats = _run_cpu_trials(model_call, args.trials, compiler.synchronize) return outs, stats -def eval_single_model_with_single_backend(args): +def _compile_and_benchmark( + args, compiler: GraphCompilerBackend, model: torch.nn.Module, input_dict: Dict +) -> Tuple[bool, Any, Dict[str, Any]]: + """Compile model and run performance benchmark. + + Returns: + Tuple of (success, outputs, time_stats). + """ + try: + compiled_model = compiler(model) + + def model_call(): + return compiled_model(**input_dict) + + outputs, time_stats = measure_performance(model_call, args, compiler) + return True, outputs, time_stats + except Exception as e: + print( + f"Run model failed: {str(e)}\n{traceback.format_exc()}", + file=sys.stderr, + flush=True, + ) + return False, None, {} + + +def _run_evaluation_core(args) -> Tuple[bool, Any, Dict[str, Any]]: + """Core evaluation logic: load model, compile, and benchmark.""" + compiler = get_compiler_backend(args) + input_dict = get_input_dict(args) + model = get_model(args) + model.eval() + + test_compiler_util.print_config( + args, + get_hardware_name(args.device), + get_compiler_version(args.compiler), + ) + + return _compile_and_benchmark(args, compiler, model, input_dict) + + +def _finalize_evaluation( + args, + success: bool, + outputs: Any, + time_stats: Dict[str, Any], + output_dump_path: Path, +) -> None: + """Finalize evaluation: save outputs and print status.""" + test_compiler_util.print_running_status(args, success) + if success: + torch.save(outputs, str(output_dump_path)) + test_compiler_util.print_with_log_prompt( + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + ) + + +def _print_log_file(log_path: Path) -> None: + """Read and print log file content to stderr.""" + print(Path(log_path).read_text(encoding="utf-8"), file=sys.stderr, flush=True) + + +def eval_single_model_with_single_backend(args) -> None: + """Evaluate a single model with a single compiler backend.""" check_and_complete_args(args) set_seed(args.seed) os.makedirs(args.output_path, exist_ok=True) + log_path = utils.get_log_path(args.output_path, args.model_path) output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) @@ -203,66 +374,19 @@ def eval_single_model_with_single_backend(args): with open(log_path, "w", encoding="utf-8") as log_f: with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = get_compiler_backend(args) - - input_dict = get_input_dict(args) - model = get_model(args) - model.eval() - - test_compiler_util.print_config( - args, - get_hardward_name(args.device), - get_compiler_version(args.compiler), - ) - - success = False - time_stats = {} - try: - compiled_model = compiler(model) - - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = measure_performance(model_call, args, compiler) - success = True - except Exception as e: - print( - f"Run model failed: {str(e)}\n{traceback.format_exc()}", - file=sys.stderr, - flush=True, - ) - - test_compiler_util.print_running_status(args, success) - if success: - torch.save(outputs, str(output_dump_path)) - test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), args.log_prompt - ) - - with open(log_path, "r", encoding="utf-8") as f: - content = f.read() - print(content, file=sys.stderr, flush=True) - - -def check_and_complete_args(args): - """ - Ensure all required arguments are present with default values if missing - """ - defaults = { - "model_path": None, # Model path - "output_path": None, # Log and output directory - "seed": 123, # Random seed - "compiler": "inductor", # Compiler name - "device": "cuda", # Device for testing the compiler (e.g., 'cpu' or 'cuda') - "op_lib": None, # Operator library - "warmup": 3, # Number of warmup steps - "trials": 5, # Number of timing trials - "log_prompt": "graph-net-bench-log", # Log prompt for performance log filtering - "model_path_prefix": None, # Prefix path to model path in args.model-path - "backend_config": None, # backend configuration json - } + success, outputs, time_stats = _run_evaluation_core(args) + _finalize_evaluation(args, success, outputs, time_stats, output_dump_path) + + _print_log_file(log_path) - for key, default in defaults.items(): + +def check_and_complete_args(args) -> None: + """Ensure all required arguments are present with default values if missing. + + Args: + args: Namespace object to be validated and completed in-place. + """ + for key, default in _ARG_DEFAULTS.items(): if not hasattr(args, key): setattr(args, key, default) diff --git a/graph_net_bench/torch/runner/base_runner.py b/graph_net_bench/torch/runner/base_runner.py index 25d0882c8..809f5f281 100644 --- a/graph_net_bench/torch/runner/base_runner.py +++ b/graph_net_bench/torch/runner/base_runner.py @@ -129,21 +129,24 @@ def _get_log_path(self, output_dir: str, model_path: str) -> Path: return Path(utils.get_log_path(output_dir, model_path)) -def create_runner(config: RunnerConfig) -> BaseRunner: - """Factory function to create appropriate runner based on config.""" - runner_type = config.strategy.runner_type - +def _get_runner_class(runner_type: RunnerType) -> type: + """Get runner class by type with lazy imports.""" if runner_type == RunnerType.LOCAL: from .local_runner import LocalRunner - return LocalRunner(config) - elif runner_type == RunnerType.PROCESS: + return LocalRunner + if runner_type == RunnerType.PROCESS: from .process_runner import ProcessRunner - return ProcessRunner(config) - elif runner_type == RunnerType.REMOTE: + return ProcessRunner + if runner_type == RunnerType.REMOTE: from .remote_runner import RemoteRunner - return RemoteRunner(config) - else: - raise ValueError(f"Unknown runner_type: {runner_type}") + return RemoteRunner + raise ValueError(f"Unknown runner_type: {runner_type}") + + +def create_runner(config: RunnerConfig) -> BaseRunner: + """Factory function to create appropriate runner based on config.""" + runner_class = _get_runner_class(config.strategy.runner_type) + return runner_class(config) diff --git a/graph_net_bench/torch/runner/local_runner.py b/graph_net_bench/torch/runner/local_runner.py index 3d07a6470..1f21dddc0 100644 --- a/graph_net_bench/torch/runner/local_runner.py +++ b/graph_net_bench/torch/runner/local_runner.py @@ -1,98 +1,143 @@ +"""Local runner for in-process model evaluation.""" + +import json import os import sys -import json -import types import traceback +import types from io import StringIO from contextlib import redirect_stdout, redirect_stderr +from pathlib import Path +from typing import Any import torch -from .base_runner import BaseRunner, RunResult +from .base_runner import BaseRunner, RunResult, RunnerConfig + + +def _write_log_file(log_path: Path, content: str) -> None: + """Write log content to file.""" + with open(log_path, "w", encoding="utf-8") as f: + f.write(content) + + +def _create_eval_args( + model_path: str, output_dir: str, config: RunnerConfig +) -> types.SimpleNamespace: + """Create evaluation arguments from config.""" + return types.SimpleNamespace( + model_path=model_path, + output_path=output_dir, + seed=config.execution.seed, + compiler=config.execution.compiler, + device=config.execution.device, + op_lib=config.execution.op_lib, + warmup=config.execution.warmup, + trials=config.execution.trials, + log_prompt=config.execution.log_prompt, + backend_config=config.execution.backend_config, + ) class LocalRunner(BaseRunner): """Execute model evaluation in the current process.""" def run(self, model_path: str, output_dir: str) -> RunResult: - from graph_net_bench.torch import eval_backend_perf - os.makedirs(output_dir, exist_ok=True) log_path = self._get_log_path(output_dir, model_path) output_path = self._get_output_path(output_dir, model_path) - - eval_args = types.SimpleNamespace( - model_path=model_path, - output_path=output_dir, - seed=self.config.execution.seed, - compiler=self.config.execution.compiler, - device=self.config.execution.device, - op_lib=self.config.execution.op_lib, - warmup=self.config.execution.warmup, - trials=self.config.execution.trials, - log_prompt=self.config.execution.log_prompt, - backend_config=self.config.execution.backend_config, - ) + eval_args = _create_eval_args(model_path, output_dir, self.config) log_buffer = StringIO() - result = RunResult( - output_path=output_path, - log_path=log_path, - ) + result = RunResult(output_path=output_path, log_path=log_path) + + self._execute_with_logging(eval_args, result, log_buffer) + self._finalize_result(result, log_buffer, log_path) + + return result + + def _execute_with_logging( + self, + eval_args: types.SimpleNamespace, + result: RunResult, + log_buffer: StringIO, + ) -> None: + """Execute evaluation with output redirection.""" + from graph_net_bench.torch import eval_backend_perf try: eval_backend_perf.register_op_lib(self.config.execution.op_lib) eval_backend_perf.set_seed(self.config.execution.seed) - with redirect_stdout(log_buffer), redirect_stderr(log_buffer): self._run_evaluation(eval_args, result) - except Exception as e: result.success = False result.error_message = f"{str(e)}\n{traceback.format_exc()}" log_buffer.write(f"\n[ERROR] {result.error_message}\n") + def _finalize_result( + self, result: RunResult, log_buffer: StringIO, log_path: Path + ) -> None: + """Finalize result: save log and print to stderr.""" result.log_content = log_buffer.getvalue() + _write_log_file(log_path, result.log_content) + print(result.log_content, file=sys.stderr, flush=True) - with open(log_path, "w", encoding="utf-8") as f: - f.write(result.log_content) + def _run_evaluation(self, args: types.SimpleNamespace, result: RunResult) -> None: + """Run model evaluation and populate result.""" + from graph_net_bench.torch import eval_backend_perf - print(result.log_content, file=sys.stderr, flush=True) + compiler, model, input_dict = self._prepare_model(args) + self._log_config(args) - return result + compiled_model = compiler(model) + + def model_call(): + return compiled_model(**input_dict) - def _run_evaluation(self, args: types.SimpleNamespace, result: RunResult): + outputs, time_stats = eval_backend_perf.measure_performance( + model_call, args, compiler + ) + + self._populate_result(result, outputs, time_stats) + self._log_completion(args, time_stats) + + def _prepare_model(self, args: types.SimpleNamespace) -> tuple: + """Prepare compiler, model, and inputs.""" from graph_net_bench.torch import eval_backend_perf - from graph_net_bench import test_compiler_util compiler = eval_backend_perf.get_compiler_backend(args) input_dict = eval_backend_perf.get_input_dict(args) model = eval_backend_perf.get_model(args) model.eval() + return compiler, model, input_dict + + def _log_config(self, args: types.SimpleNamespace) -> None: + """Log configuration information.""" + from graph_net_bench.torch import eval_backend_perf + from graph_net_bench import test_compiler_util test_compiler_util.print_config( args, - eval_backend_perf.get_hardward_name(args.device), + eval_backend_perf.get_hardware_name(args.device), eval_backend_perf.get_compiler_version(args.compiler), ) - compiled_model = compiler(model) - - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = eval_backend_perf.measure_performance( - model_call, args, compiler - ) - + def _populate_result( + self, result: RunResult, outputs: Any, time_stats: dict + ) -> None: + """Populate result with outputs and stats.""" result.success = True result.outputs = outputs result.time_stats = time_stats - if result.output_path: torch.save(outputs, str(result.output_path)) + def _log_completion(self, args: types.SimpleNamespace, time_stats: dict) -> None: + """Log completion status and performance stats.""" + from graph_net_bench import test_compiler_util + test_compiler_util.print_running_status(args, True) test_compiler_util.print_with_log_prompt( "[Performance][eager]:", json.dumps(time_stats), args.log_prompt diff --git a/graph_net_bench/torch/runner/process_runner.py b/graph_net_bench/torch/runner/process_runner.py index 9ac68607e..1a48fec8e 100644 --- a/graph_net_bench/torch/runner/process_runner.py +++ b/graph_net_bench/torch/runner/process_runner.py @@ -1,57 +1,51 @@ +"""Process runner for subprocess-based model evaluation.""" + import os -import sys import subprocess +import sys from pathlib import Path +from typing import Dict import torch from .base_runner import BaseRunner, RunResult +def _get_env_with_pythonpath() -> Dict[str, str]: + """Get environment with PYTHONPATH set to repo root.""" + env = os.environ.copy() + repo_root = Path(__file__).resolve().parents[3] + env["PYTHONPATH"] = f"{repo_root}:{env.get('PYTHONPATH', '')}" + return env + + class ProcessRunner(BaseRunner): """Execute model evaluation in a separate subprocess on the local machine.""" def run(self, model_path: str, output_dir: str) -> RunResult: os.makedirs(output_dir, exist_ok=True) - log_path = self._get_log_path(output_dir, model_path) - output_path = self._get_output_path(output_dir, model_path) - result = RunResult( - output_path=output_path, - log_path=log_path, + output_path=self._get_output_path(output_dir, model_path), + log_path=self._get_log_path(output_dir, model_path), ) cmd = self._build_command(model_path, output_dir) print(f"[ProcessRunner] Executing: {cmd}", file=sys.stderr, flush=True) - try: - env = os.environ.copy() - repo_root = Path(__file__).resolve().parents[3] - env["PYTHONPATH"] = f"{repo_root}:{env.get('PYTHONPATH', '')}" - - proc = subprocess.run( - cmd, - shell=True, - env=env, - capture_output=True, - text=True, - timeout=self.config.strategy.subprocess_timeout, - ) - - result.log_content = proc.stderr or "" + self._execute_subprocess(cmd, result, output_dir, model_path) + print(result.log_content, file=sys.stderr, flush=True) - if proc.returncode != 0: - result.success = False - result.error_message = ( - f"Process exited with code {proc.returncode}\n" - f"stdout: {proc.stdout}\n" - f"stderr: {proc.stderr}" - ) - else: - result.success = True - self._parse_result(result, output_dir, model_path) + return result + def _execute_subprocess( + self, cmd: str, result: RunResult, output_dir: str, model_path: str + ) -> None: + """Execute subprocess and handle results.""" + try: + proc = self._run_process(cmd) + result.log_content = proc.stderr or "" + self._handle_process_result(proc, result, output_dir, model_path) except subprocess.TimeoutExpired as e: result.success = False result.error_message = f"Process timed out: {e}" @@ -59,10 +53,41 @@ def run(self, model_path: str, output_dir: str) -> RunResult: result.success = False result.error_message = f"Process execution failed: {e}" - print(result.log_content, file=sys.stderr, flush=True) - return result + def _run_process(self, cmd: str) -> subprocess.CompletedProcess: + """Run subprocess with configured timeout.""" + return subprocess.run( + cmd, + shell=True, + env=_get_env_with_pythonpath(), + capture_output=True, + text=True, + timeout=self.config.strategy.subprocess_timeout, + ) + + def _handle_process_result( + self, + proc: subprocess.CompletedProcess, + result: RunResult, + output_dir: str, + model_path: str, + ) -> None: + """Handle subprocess completion result.""" + if proc.returncode != 0: + result.success = False + result.error_message = ( + f"Process exited with code {proc.returncode}\n" + f"stdout: {proc.stdout}\n" + f"stderr: {proc.stderr}" + ) + return + result.success = True + self._parse_result(result, output_dir, model_path) def _build_command(self, model_path: str, output_dir: str) -> str: + """Build subprocess command string.""" + from graph_net_bench import test_compiler_util + + config_str = test_compiler_util.convert_to_base64(self.config.to_dict()) cmd_parts = [ sys.executable, "-m", @@ -71,32 +96,36 @@ def _build_command(self, model_path: str, output_dir: str) -> str: model_path, "--output-path", output_dir, + "--config", + config_str, ] - - config_dict = self.config.to_dict() - from graph_net_bench import test_compiler_util - - config_str = test_compiler_util.convert_to_base64(config_dict) - cmd_parts.extend(["--config", config_str]) - return " ".join(cmd_parts) - def _parse_result(self, result: RunResult, output_dir: str, model_path: str): + def _parse_result( + self, result: RunResult, output_dir: str, model_path: str + ) -> None: + """Parse outputs and logs from subprocess result.""" + self._load_outputs(result) + self._parse_log(result) + + def _load_outputs(self, result: RunResult) -> None: + """Load model outputs from file.""" + if not result.output_path or not result.output_path.exists(): + return + try: + result.outputs = torch.load(str(result.output_path)) + except Exception as e: + result.error_message += f"\nFailed to load outputs: {e}" + + def _parse_log(self, result: RunResult) -> None: + """Parse log file for content and timing stats.""" + if not result.log_path or not result.log_path.exists(): + return from graph_net_bench import test_compiler_util - if result.output_path and result.output_path.exists(): - try: - result.outputs = torch.load(str(result.output_path)) - except Exception as e: - result.error_message += f"\nFailed to load outputs: {e}" - - if result.log_path and result.log_path.exists(): - try: - result.log_content = test_compiler_util.extract_log_content( - str(result.log_path) - ) - result.time_stats = test_compiler_util.parse_performance_stats( - str(result.log_path) - ) - except Exception as e: - result.error_message += f"\nFailed to parse log: {e}" + try: + log_path_str = str(result.log_path) + result.log_content = test_compiler_util.extract_log_content(log_path_str) + result.time_stats = test_compiler_util.parse_performance_stats(log_path_str) + except Exception as e: + result.error_message += f"\nFailed to parse log: {e}" diff --git a/graph_net_bench/torch/runner/remote_runner.py b/graph_net_bench/torch/runner/remote_runner.py index 74c5e651f..c7c371278 100644 --- a/graph_net_bench/torch/runner/remote_runner.py +++ b/graph_net_bench/torch/runner/remote_runner.py @@ -1,135 +1,181 @@ +"""Remote runner for gRPC-based model evaluation.""" + import os import sys -from typing import Dict +import traceback +from pathlib import Path +from typing import Dict, Optional import torch from .base_runner import BaseRunner, RunResult +def _find_file_by_extension( + files_dict: Dict[str, bytes], expected_name: Optional[str], extension: str +) -> Optional[str]: + """Find file in dict by expected name or by extension if only one exists.""" + if expected_name and expected_name in files_dict: + return expected_name + available = sorted(k for k in files_dict.keys() if k.endswith(extension)) + if len(available) == 1: + return available[0] + return None + + +def _save_bytes_to_file(path: Path, content: bytes) -> None: + """Save bytes content to file.""" + with open(path, "wb") as f: + f.write(content) + + class RemoteRunner(BaseRunner): """Execute model evaluation on a remote machine via gRPC.""" def run(self, model_path: str, output_dir: str) -> RunResult: - from graph_net_rpc.sample_remote_executor import SampleRemoteExecutor - os.makedirs(output_dir, exist_ok=True) - log_path = self._get_log_path(output_dir, model_path) - output_path = self._get_output_path(output_dir, model_path) - result = RunResult( - output_path=output_path, - log_path=log_path, + output_path=self._get_output_path(output_dir, model_path), + log_path=self._get_log_path(output_dir, model_path), ) - rpc_cmd = self._build_rpc_command() + self._execute_remote(model_path, result) + return result + + def _execute_remote(self, model_path: str, result: RunResult) -> None: + """Execute model on remote machine.""" + from graph_net_rpc.sample_remote_executor import SampleRemoteExecutor + executor = SampleRemoteExecutor( machine=self.config.strategy.remote_machine, port=self.config.strategy.remote_port, ) try: - print( - f"[RemoteRunner] Sending to {self.config.strategy.remote_machine}:{self.config.strategy.remote_port}", - file=sys.stderr, - flush=True, - ) + self._log_execution_start() + rpc_cmd = self._build_rpc_command() print(f"[RemoteRunner] rpc_cmd: {rpc_cmd}", file=sys.stderr, flush=True) files_dict = executor.execute(model_path, rpc_cmd) - self._process_remote_output(result, files_dict, output_dir, model_path) + self._process_remote_output(result, files_dict) result.success = True - except Exception as e: - import traceback - result.success = False result.error_message = ( f"Remote execution failed: {e}\n{traceback.format_exc()}" ) print(result.error_message, file=sys.stderr, flush=True) - finally: executor.close() - return result + def _log_execution_start(self) -> None: + """Log remote execution start.""" + machine = self.config.strategy.remote_machine + port = self.config.strategy.remote_port + print( + f"[RemoteRunner] Sending to {machine}:{port}", file=sys.stderr, flush=True + ) def _build_rpc_command(self) -> str: - cmd = "python3 -m graph_net.torch.test_reference_device" - cmd += ' --model-path "$INPUT_WORKSPACE"' - cmd += ' --reference-dir "$OUTPUT_WORKSPACE"' - cmd += f" --compiler {self.config.execution.compiler}" - cmd += f" --device {self.config.execution.device}" - cmd += f" --op-lib {self.config.execution.op_lib}" - cmd += f" --warmup {self.config.execution.warmup}" - cmd += f" --trials {self.config.execution.trials}" - cmd += f" --seed {self.config.execution.seed}" - - if self.config.execution.log_prompt: - cmd += f" --log-prompt {self.config.execution.log_prompt}" - if self.config.execution.backend_config: - cmd += f" --config {self.config.execution.backend_config}" - - return cmd + """Build remote execution command string.""" + exec_cfg = self.config.execution + cmd_parts = [ + "python3 -m graph_net.torch.test_reference_device", + '--model-path "$INPUT_WORKSPACE"', + '--reference-dir "$OUTPUT_WORKSPACE"', + f"--compiler {exec_cfg.compiler}", + f"--device {exec_cfg.device}", + f"--op-lib {exec_cfg.op_lib}", + f"--warmup {exec_cfg.warmup}", + f"--trials {exec_cfg.trials}", + f"--seed {exec_cfg.seed}", + ] + if exec_cfg.log_prompt: + cmd_parts.append(f"--log-prompt {exec_cfg.log_prompt}") + if exec_cfg.backend_config: + cmd_parts.append(f"--config {exec_cfg.backend_config}") + return " ".join(cmd_parts) def _process_remote_output( - self, - result: RunResult, - files_dict: Dict[str, bytes], - output_dir: str, - model_path: str, - ): - from graph_net_bench import test_compiler_util - - log_filename = result.log_path.name if result.log_path else None - pth_filename = result.output_path.name if result.output_path else None - - available_logs = sorted([k for k in files_dict.keys() if k.endswith(".log")]) - available_pths = sorted([k for k in files_dict.keys() if k.endswith(".pth")]) - - if log_filename not in files_dict and len(available_logs) == 1: - log_filename = available_logs[0] - if pth_filename not in files_dict and len(available_pths) == 1: - pth_filename = available_pths[0] - - if log_filename and log_filename in files_dict: - log_bytes = files_dict[log_filename] - if result.log_path: - with open(result.log_path, "wb") as f: - f.write(log_bytes) - try: - result.log_content = log_bytes.decode("utf-8") - print(result.log_content, file=sys.stderr, flush=True) - except Exception: - result.log_content = f"[Binary log, {len(log_bytes)} bytes]" - # Write binary content as text for parsing - with open(result.log_path, "wb") as f: - f.write(log_bytes) - - try: - result.time_stats = test_compiler_util.parse_performance_stats( - str(result.log_path) - ) - except Exception as e: - print(f"Warning: Failed to parse time stats: {e}", file=sys.stderr) - else: + self, result: RunResult, files_dict: Dict[str, bytes] + ) -> None: + """Process files received from remote execution.""" + self._process_log_file(result, files_dict) + self._process_output_file(result, files_dict) + + def _process_log_file( + self, result: RunResult, files_dict: Dict[str, bytes] + ) -> None: + """Process log file from remote output.""" + expected_name = result.log_path.name if result.log_path else None + log_filename = _find_file_by_extension(files_dict, expected_name, ".log") + + if not log_filename: + available = [k for k in files_dict.keys() if k.endswith(".log")] print( - f"Warning: log not found. expected={log_filename}, available={available_logs}", + f"Warning: log not found. expected={expected_name}, available={available}", file=sys.stderr, ) + return + + log_bytes = files_dict[log_filename] + self._save_and_parse_log(result, log_bytes) + + def _save_and_parse_log(self, result: RunResult, log_bytes: bytes) -> None: + """Save log file and parse timing stats.""" + + if result.log_path: + _save_bytes_to_file(result.log_path, log_bytes) + + result.log_content = self._decode_log_content(log_bytes) + print(result.log_content, file=sys.stderr, flush=True) + + self._parse_time_stats(result) + + def _decode_log_content(self, log_bytes: bytes) -> str: + """Decode log bytes to string.""" + try: + return log_bytes.decode("utf-8") + except Exception: + return f"[Binary log, {len(log_bytes)} bytes]" + + def _parse_time_stats(self, result: RunResult) -> None: + """Parse performance stats from log file.""" + if not result.log_path: + return + from graph_net_bench import test_compiler_util - if pth_filename and pth_filename in files_dict: - pth_bytes = files_dict[pth_filename] - if result.output_path: - with open(result.output_path, "wb") as f: - f.write(pth_bytes) - try: - result.outputs = torch.load(str(result.output_path)) - except Exception as e: - print(f"Warning: Failed to load outputs: {e}", file=sys.stderr) - else: + try: + result.time_stats = test_compiler_util.parse_performance_stats( + str(result.log_path) + ) + except Exception as e: + print(f"Warning: Failed to parse time stats: {e}", file=sys.stderr) + + def _process_output_file( + self, result: RunResult, files_dict: Dict[str, bytes] + ) -> None: + """Process output .pth file from remote output.""" + expected_name = result.output_path.name if result.output_path else None + pth_filename = _find_file_by_extension(files_dict, expected_name, ".pth") + + if not pth_filename: + available = [k for k in files_dict.keys() if k.endswith(".pth")] print( - f"Warning: output not found. expected={pth_filename}, available={available_pths}", + f"Warning: output not found. expected={expected_name}, available={available}", file=sys.stderr, ) + return + + pth_bytes = files_dict[pth_filename] + self._save_and_load_outputs(result, pth_bytes) + + def _save_and_load_outputs(self, result: RunResult, pth_bytes: bytes) -> None: + """Save output file and load tensors.""" + if result.output_path: + _save_bytes_to_file(result.output_path, pth_bytes) + try: + result.outputs = torch.load(str(result.output_path)) + except Exception as e: + print(f"Warning: Failed to load outputs: {e}", file=sys.stderr) diff --git a/graph_net_bench/torch/test_compiler.py b/graph_net_bench/torch/test_compiler.py index 8ee670fd2..52e027c65 100755 --- a/graph_net_bench/torch/test_compiler.py +++ b/graph_net_bench/torch/test_compiler.py @@ -58,7 +58,7 @@ def set_seed(random_seed): torch.cuda.manual_seed_all(random_seed) -def get_hardward_name(args): +def get_hardware_name(args): hardware_name = "unknown" if "cuda" in args.device: hardware_name = torch.cuda.get_device_name(args.device) @@ -146,7 +146,7 @@ def measure_performance(model_call, args, compiler): model_call() compiler.synchronize() - hardware_name = get_hardward_name(args) + hardware_name = get_hardware_name(args) print( f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", file=sys.stderr, @@ -214,7 +214,7 @@ def test_single_model(args): "[Processing]", model_path, args.log_prompt ) test_compiler_util.print_basic_config( - args, get_hardward_name(args), get_compile_framework_version(args) + args, get_hardware_name(args), get_compile_framework_version(args) ) runtime_seed = 1024