From 9fe074395fc75018daa6a2cf3e4d1c15a77ec498 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 19 May 2026 10:36:34 +0100 Subject: [PATCH] [KB] Add smoke test to check coverage Compilation / execution coverage in yaml by means of `warning` entries. No performance checks so far (still only the first two fast). --- examples/end-to-end/KernelBench/level1.yaml | 43 ++++++------------- examples/end-to-end/KernelBench/level2.yaml | 12 ++++++ .../KernelBench/test_kernel_bench.py | 27 +++++++++--- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/examples/end-to-end/KernelBench/level1.yaml b/examples/end-to-end/KernelBench/level1.yaml index 4a9d2e6c..4fc8c470 100644 --- a/examples/end-to-end/KernelBench/level1.yaml +++ b/examples/end-to-end/KernelBench/level1.yaml @@ -64,9 +64,7 @@ initializations: [rnd, rnd] output_shape: 1024x1024 dtypes: [f32, bf16] - # gflops: (1024 * 32 * 1024 * 2) / 1e9 - # pipeline: matmul - warning: "Optimized pipeline error: too many tiles provided, expected at most 3 found 4" + gflops: (1024 * 32 * 1024 * 2) / 1e9 - kernel: level1/10_3D_tensor_matrix_multiplication.py input_shapes: [16x1024x2048, 2048x768] @@ -88,8 +86,7 @@ output_shape: 4096x4096 dtypes: [f32, bf16] warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: - Lowering TorchFX IR -> Torch Backend IR failed with the following diagnostics: - python exception: Failure while executing pass pipeline''' + error: failed to legalize operation torch.operator that was explicitly marked illegal''' - kernel: level1/13_Matmul_for_symmetric_matrices.py input_shapes: [4096x4096, 4096x4096] @@ -107,7 +104,6 @@ gflops: (4096 * 4096 * 4096 * 2) / 1e9 pipeline: matmul - - kernel: level1/15_Matmul_for_lower_triangular_matrices.py input_shapes: [4096x4096, 4096x4096] initializations: [rnd, rnd] @@ -116,7 +112,6 @@ gflops: (4096 * 4096 * 4096 * 2) / 1e9 pipeline: matmul - - kernel: level1/16_Matmul_with_transposed_A.py input_shapes: [8192x2048, 8192x4096] initializations: [rnd, rnd] @@ -148,7 +143,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/20_LeakyReLU.py input_shapes: [4096x393216] initializations: [rnd] @@ -156,7 +150,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/21_Sigmoid.py input_shapes: [4096x393216] initializations: [rnd] @@ -164,7 +157,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/22_Tanh.py input_shapes: [4096x393216] initializations: [rnd] @@ -172,7 +164,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/23_Softmax.py input_shapes: [4096x393216] initializations: [rnd] @@ -180,7 +171,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/24_LogSoftmax.py input_shapes: [4096x393216] initializations: [rnd] @@ -188,7 +178,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/25_Swish.py input_shapes: [4096x393216] initializations: [rnd] @@ -196,14 +185,14 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/26_GELU_.py input_shapes: [4096x393216] initializations: [rnd] output_shape: 4096x393216 dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - + warning: '''error: cannot be converted to LLVM IR: + missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf''' - kernel: level1/27_SELU_.py input_shapes: [4096x393216] @@ -212,7 +201,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/28_HardSigmoid.py input_shapes: [4096x393216] initializations: [rnd] @@ -220,7 +208,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/29_Softplus.py input_shapes: [4096x393216] initializations: [rnd] @@ -228,7 +215,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/30_Softsign.py input_shapes: [4096x393216] initializations: [rnd] @@ -236,7 +222,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/31_ELU.py input_shapes: [4096x393216] initializations: [rnd] @@ -244,7 +229,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/32_HardTanh.py input_shapes: [4096x393216] initializations: [rnd] @@ -252,7 +236,6 @@ dtypes: [f32, bf16] gflops: (4096 * 393216) / 1e9 - - kernel: level1/33_BatchNorm.py input_shapes: [64x64x512x512] initializations: [rnd] @@ -260,7 +243,6 @@ dtypes: [f32, bf16] gflops: (64 * 64 * 512 * 512) / 1e9 - - kernel: level1/34_InstanceNorm.py input_shapes: [112x64x512x512] initializations: [rnd] @@ -268,7 +250,6 @@ dtypes: [f32, bf16] gflops: (112 * 64 * 512 * 512) / 1e9 - - kernel: level1/35_GroupNorm_.py input_shapes: [112x64x512x512] initializations: [rnd] @@ -276,7 +257,6 @@ dtypes: [f32, bf16] gflops: (112 * 64 * 512 * 512) / 1e9 - - kernel: level1/36_RMSNorm_.py input_shapes: [112x64x512x512] initializations: [rnd] @@ -284,7 +264,6 @@ dtypes: [f32, bf16] gflops: (112 * 64 * 512 * 512) / 1e9 - - kernel: level1/37_FrobeniusNorm_.py input_shapes: [112x64x512x512] initializations: [rnd] @@ -292,7 +271,6 @@ dtypes: [f32, bf16] gflops: (112 * 64 * 512 * 512) / 1e9 - - kernel: level1/38_L1Norm_.py input_shapes: [32768x65535] initializations: [rnd] @@ -300,7 +278,6 @@ dtypes: [f32, bf16] gflops: (32768 * 65535) / 1e9 - - kernel: level1/39_L2Norm_.py input_shapes: [32768x65535] initializations: [rnd] @@ -308,7 +285,6 @@ dtypes: [f32, bf16] gflops: (32768 * 65535) / 1e9 - - kernel: level1/40_LayerNorm.py input_shapes: [16x64x256x256] initializations: [rnd] @@ -316,7 +292,6 @@ dtypes: [f32, bf16] gflops: (16 * 64 * 256 * 256) / 1e9 - - kernel: level1/41_Max_Pooling_1D.py input_shapes: [64x192x65536] initializations: [rnd] @@ -624,6 +599,7 @@ output_shape: 32768x32768 dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'" - kernel: level1/90_cumprod.py input_shapes: [32768x32768] @@ -631,6 +607,7 @@ output_shape: 32768x32768 dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'" - kernel: level1/91_cumsum_reverse.py input_shapes: [32768x32768] @@ -638,6 +615,7 @@ output_shape: 32768x32768 dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'" - kernel: level1/92_cumsum_exclusive.py input_shapes: [32768x32768] @@ -645,6 +623,7 @@ output_shape: 32767x32769 dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'" - kernel: level1/93_masked_cumsum.py input_shapes: [32768x32768, 32768x32768] @@ -652,6 +631,7 @@ output_shape: 32768x32768 dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'" - kernel: level1/94_MSELoss.py input_shapes: [32768x32768, 32768x32768] @@ -666,6 +646,7 @@ output_shape: "1" dtypes: [f32, bf16] gflops: (32768 * 4096) / 1e9 + warning: "RuntimeError: gather(): Expected dtype int32/int64 for index, but got torch.float32" - kernel: level1/96_HuberLoss.py input_shapes: [32768x32768, 32768x32768] @@ -673,6 +654,8 @@ output_shape: "1" dtypes: [f32, bf16] gflops: (32768 * 32768) / 1e9 + warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: + error: failed to legalize operation torch.operator that was explicitly marked illegal''' - kernel: level1/97_ScaledDotProductAttention.py input_shapes: [32x32x512x1024, 32x32x512x1024, 32x32x512x1024] @@ -694,6 +677,8 @@ output_shape: "1" dtypes: [f32, bf16] gflops: (32768 * 8192) / 1e9 + warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: + error: failed to legalize operation torch.operator that was explicitly marked illegal''' - kernel: level1/100_HingeLoss.py input_shapes: [32768x32768, 32768] diff --git a/examples/end-to-end/KernelBench/level2.yaml b/examples/end-to-end/KernelBench/level2.yaml index 1a455195..d48e137f 100644 --- a/examples/end-to-end/KernelBench/level2.yaml +++ b/examples/end-to-end/KernelBench/level2.yaml @@ -15,6 +15,7 @@ initializations: [rnd] output_shape: 32x64x16x32x32 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/4_Conv2d_Mish_Mish.py input_shapes: [64x64x256x256] @@ -39,6 +40,7 @@ initializations: [rnd] output_shape: 64x32x30x62x62 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/8_Conv3d_Divide_Max_GlobalAvgPool_BiasAdd_Sum.py input_shapes: [128x8x16x64x64] @@ -112,6 +114,7 @@ initializations: [rnd] output_shape: 128x64x258x258 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py input_shapes: [16x32x16x32x32] @@ -203,6 +206,7 @@ initializations: [rnd] output_shape: 32x64x32x64x64 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py input_shapes: [128x64x128x128] @@ -215,6 +219,7 @@ initializations: [rnd] output_shape: 16x1x1x256 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/37_Matmul_Swish_Sum_GroupNorm.py input_shapes: [32768x1024] @@ -245,18 +250,21 @@ initializations: [rnd] output_shape: 16384x4096 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py input_shapes: [16x64x512x512] initializations: [rnd] output_shape: 16x1 dtypes: [f32, bf16] + warning: "error: failed to legalize operation torch.constant.bool" - kernel: level2/43_Conv3d_Max_LogSumExp_ReLU.py input_shapes: [4x32x32x128x128] initializations: [rnd] output_shape: 4x1x16x64x64 dtypes: [f32, bf16] + warning: "error: failed to legalize operation torch.constant.bool" - kernel: level2/44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py input_shapes: [16x64x128x128] @@ -313,6 +321,8 @@ initializations: [rnd] output_shape: 64x128x126x126 dtypes: [f32, bf16] + warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: + error: failed to legalize operation torch.operator that was explicitly marked illegal''' - kernel: level2/53_Gemm_Scaling_Hardtanh_GELU.py input_shapes: [2048x8192] @@ -325,6 +335,7 @@ initializations: [rnd] output_shape: 64x64x254x254 dtypes: [f32, bf16] + warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf" - kernel: level2/55_Matmul_MaxPool_Sum_Scale.py input_shapes: [128x32768] @@ -351,6 +362,7 @@ initializations: [rnd] output_shape: 128x1x31x63x63 dtypes: [f32, bf16] + warning: "'ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: failed to legalize operation torch.constant.bool'" - kernel: level2/59_Matmul_Swish_Scaling.py input_shapes: [128x32768] diff --git a/examples/end-to-end/KernelBench/test_kernel_bench.py b/examples/end-to-end/KernelBench/test_kernel_bench.py index e9aadf0b..c12e8483 100755 --- a/examples/end-to-end/KernelBench/test_kernel_bench.py +++ b/examples/end-to-end/KernelBench/test_kernel_bench.py @@ -58,6 +58,12 @@ def get_tests(args: argparse.Namespace) -> list[dict]: # If a specific test is specified, only include that test if args.test and not test["kernel"].startswith(args.test): continue + # CI mode runs fewer tests for faster feedback + if args.ci and len(test_list) >= 5: + break + # Smoke tests run on the simplest lowering + if args.smoke_test: + test["pipeline"] = str(kb_default_pipeline) for dtype in test["dtypes"]: if not args.bf16 and dtype == "bf16": continue @@ -78,9 +84,6 @@ def get_tests(args: argparse.Namespace) -> list[dict]: "warning": test.get("warning", None), } ) - # CI mode runs fewer tests for faster feedback - if args.ci and len(test_list) >= 5: - return test_list return test_list @@ -110,7 +113,7 @@ def get_flops_per_second(stdout: str, gflops: float) -> float: Parser.add_argument( "--ci", action=argparse.BooleanOptionalAction, - help="Enable CI mode (faster run, fewer kernels).", + help="Enable CI mode (faster run, fewer kernels). Incompatible with --smoke-test.", ) Parser.add_argument( "--test", @@ -122,7 +125,17 @@ def get_flops_per_second(stdout: str, gflops: float) -> float: action=argparse.BooleanOptionalAction, help="Whether to print the MLIR module after all stages. Default is False.", ) + Parser.add_argument( + "--smoke-test", + action=argparse.BooleanOptionalAction, + help="Runs every kernel with loops lowering to pipe clean.", + ) args = Parser.parse_args() + if args.smoke_test and args.ci: + print("\nERROR: Smoke test and CI mode are incompatible.\n") + Parser.print_help() + exit(1) + tests = get_tests(args) if len(tests) == 0: if args.test: @@ -181,7 +194,11 @@ def get_flops_per_second(stdout: str, gflops: float) -> float: print(result.stderr) print(f"Return code: {result.returncode}") - assert result.returncode == 0, "Execution failed" + + # Only stop on failure on normal runs. + # Smoke tests try to run as much as possible. + if not args.smoke_test: + assert result.returncode == 0, "Execution failed" # CHECK: 1_Square_matrix_multiplication_.mlir # CHECK: 0.3745{{.*}} 0.9507{{.*}} 0.7319{{.*}} ... 0.2973{{.*}} 0.9243{{.*}} 0.9710{{.*}}