llvm · rengolin · May 19, 2026 · May 19, 2026
diff --git a/examples/end-to-end/KernelBench/level1.yaml b/examples/end-to-end/KernelBench/level1.yaml
@@ -64,9 +64,7 @@
   initializations: [rnd, rnd]
   output_shape: 1024x1024
   dtypes: [f32, bf16]
-  # gflops: (1024 * 32 * 1024 * 2) / 1e9
-  # pipeline: matmul
-  warning: "Optimized pipeline error: too many tiles provided, expected at most 3 found 4"
+  gflops: (1024 * 32 * 1024 * 2) / 1e9
 
 - kernel: level1/10_3D_tensor_matrix_multiplication.py
   input_shapes: [16x1024x2048, 2048x768]
@@ -88,8 +86,7 @@
   output_shape: 4096x4096
   dtypes: [f32, bf16]
   warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError:
-              Lowering TorchFX IR -> Torch Backend IR failed with the following diagnostics:
-              python exception: Failure while executing pass pipeline'''
+              error: failed to legalize operation torch.operator that was explicitly marked illegal'''
 
 - kernel: level1/13_Matmul_for_symmetric_matrices.py
   input_shapes: [4096x4096, 4096x4096]
@@ -107,7 +104,6 @@
   gflops: (4096 * 4096 * 4096 * 2) / 1e9
   pipeline: matmul
 
-
 - kernel: level1/15_Matmul_for_lower_triangular_matrices.py
   input_shapes: [4096x4096, 4096x4096]
   initializations: [rnd, rnd]
@@ -116,7 +112,6 @@
   gflops: (4096 * 4096 * 4096 * 2) / 1e9
   pipeline: matmul
 
-
 - kernel: level1/16_Matmul_with_transposed_A.py
   input_shapes: [8192x2048, 8192x4096]
   initializations: [rnd, rnd]
@@ -148,62 +143,56 @@
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/20_LeakyReLU.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/21_Sigmoid.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/22_Tanh.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/23_Softmax.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/24_LogSoftmax.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/25_Swish.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/26_GELU_.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
-
+  warning: '''error: cannot be converted to LLVM IR:
+              missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf'''
 
 - kernel: level1/27_SELU_.py
   input_shapes: [4096x393216]
@@ -212,111 +201,97 @@
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/28_HardSigmoid.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/29_Softplus.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/30_Softsign.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/31_ELU.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/32_HardTanh.py
   input_shapes: [4096x393216]
   initializations: [rnd]
   output_shape: 4096x393216
   dtypes: [f32, bf16]
   gflops: (4096 * 393216) / 1e9
 
-
 - kernel: level1/33_BatchNorm.py
   input_shapes: [64x64x512x512]
   initializations: [rnd]
   output_shape: 64x64x512x512
   dtypes: [f32, bf16]
   gflops: (64 * 64 * 512 * 512) / 1e9
 
-
 - kernel: level1/34_InstanceNorm.py
   input_shapes: [112x64x512x512]
   initializations: [rnd]
   output_shape: 112x64x512x512
   dtypes: [f32, bf16]
   gflops: (112 * 64 * 512 * 512) / 1e9
 
-
 - kernel: level1/35_GroupNorm_.py
   input_shapes: [112x64x512x512]
   initializations: [rnd]
   output_shape: 112x64x512x512
   dtypes: [f32, bf16]
   gflops: (112 * 64 * 512 * 512) / 1e9
 
-
 - kernel: level1/36_RMSNorm_.py
   input_shapes: [112x64x512x512]
   initializations: [rnd]
   output_shape: 112x64x512x512
   dtypes: [f32, bf16]
   gflops: (112 * 64 * 512 * 512) / 1e9
 
-
 - kernel: level1/37_FrobeniusNorm_.py
   input_shapes: [112x64x512x512]
   initializations: [rnd]
   output_shape: 112x64x512x512
   dtypes: [f32, bf16]
   gflops: (112 * 64 * 512 * 512) / 1e9
 
-
 - kernel: level1/38_L1Norm_.py
   input_shapes: [32768x65535]
   initializations: [rnd]
   output_shape: 32768x65535
   dtypes: [f32, bf16]
   gflops: (32768 * 65535) / 1e9
 
-
 - kernel: level1/39_L2Norm_.py
   input_shapes: [32768x65535]
   initializations: [rnd]
   output_shape: 32768x65535
   dtypes: [f32, bf16]
   gflops: (32768 * 65535) / 1e9
 
-
 - kernel: level1/40_LayerNorm.py
   input_shapes: [16x64x256x256]
   initializations: [rnd]
   output_shape: 16x64x256x256
   dtypes: [f32, bf16]
   gflops: (16 * 64 * 256 * 256) / 1e9
 
-
 - kernel: level1/41_Max_Pooling_1D.py
   input_shapes: [64x192x65536]
   initializations: [rnd]
@@ -624,34 +599,39 @@
   output_shape: 32768x32768
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'"
 
 - kernel: level1/90_cumprod.py
   input_shapes: [32768x32768]
   initializations: [rnd]
   output_shape: 32768x32768
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'"
 
 - kernel: level1/91_cumsum_reverse.py
   input_shapes: [32768x32768]
   initializations: [rnd]
   output_shape: 32768x32768
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'"
 
 - kernel: level1/92_cumsum_exclusive.py
   input_shapes: [32768x32768]
   initializations: [rnd]
   output_shape: 32767x32769
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'"
 
 - kernel: level1/93_masked_cumsum.py
   input_shapes: [32768x32768, 32768x32768]
   initializations: [rnd, rnd]
   output_shape: 32768x32768
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: "error: Dialect `tm_tensor' not found for custom op 'tm_tensor.scan'"
 
 - kernel: level1/94_MSELoss.py
   input_shapes: [32768x32768, 32768x32768]
@@ -666,13 +646,16 @@
   output_shape: "1"
   dtypes: [f32, bf16]
   gflops: (32768 * 4096) / 1e9
+  warning: "RuntimeError: gather(): Expected dtype int32/int64 for index, but got torch.float32"
 
 - kernel: level1/96_HuberLoss.py
   input_shapes: [32768x32768, 32768x32768]
   initializations: [rnd, rnd]
   output_shape: "1"
   dtypes: [f32, bf16]
   gflops: (32768 * 32768) / 1e9
+  warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError:
+              error: failed to legalize operation torch.operator that was explicitly marked illegal'''
 
 - kernel: level1/97_ScaledDotProductAttention.py
   input_shapes: [32x32x512x1024, 32x32x512x1024, 32x32x512x1024]
@@ -694,6 +677,8 @@
   output_shape: "1"
   dtypes: [f32, bf16]
   gflops: (32768 * 8192) / 1e9
+  warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError:
+              error: failed to legalize operation torch.operator that was explicitly marked illegal'''
 
 - kernel: level1/100_HingeLoss.py
   input_shapes: [32768x32768, 32768]

diff --git a/examples/end-to-end/KernelBench/level2.yaml b/examples/end-to-end/KernelBench/level2.yaml
@@ -15,6 +15,7 @@
   initializations: [rnd]
   output_shape: 32x64x16x32x32
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/4_Conv2d_Mish_Mish.py
   input_shapes: [64x64x256x256]
@@ -39,6 +40,7 @@
   initializations: [rnd]
   output_shape: 64x32x30x62x62
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/8_Conv3d_Divide_Max_GlobalAvgPool_BiasAdd_Sum.py
   input_shapes: [128x8x16x64x64]
@@ -112,6 +114,7 @@
   initializations: [rnd]
   output_shape: 128x64x258x258
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py
   input_shapes: [16x32x16x32x32]
@@ -203,6 +206,7 @@
   initializations: [rnd]
   output_shape: 32x64x32x64x64
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py
   input_shapes: [128x64x128x128]
@@ -215,6 +219,7 @@
   initializations: [rnd]
   output_shape: 16x1x1x256
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/37_Matmul_Swish_Sum_GroupNorm.py
   input_shapes: [32768x1024]
@@ -245,18 +250,21 @@
   initializations: [rnd]
   output_shape: 16384x4096
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py
   input_shapes: [16x64x512x512]
   initializations: [rnd]
   output_shape: 16x1
   dtypes: [f32, bf16]
+  warning: "error: failed to legalize operation torch.constant.bool"
 
 - kernel: level2/43_Conv3d_Max_LogSumExp_ReLU.py
   input_shapes: [4x32x32x128x128]
   initializations: [rnd]
   output_shape: 4x1x16x64x64
   dtypes: [f32, bf16]
+  warning: "error: failed to legalize operation torch.constant.bool"
 
 - kernel: level2/44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py
   input_shapes: [16x64x128x128]
@@ -313,6 +321,8 @@
   initializations: [rnd]
   output_shape: 64x128x126x126
   dtypes: [f32, bf16]
+  warning: '''ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError:
+              error: failed to legalize operation torch.operator that was explicitly marked illegal'''
 
 - kernel: level2/53_Gemm_Scaling_Hardtanh_GELU.py
   input_shapes: [2048x8192]
@@ -325,6 +335,7 @@
   initializations: [rnd]
   output_shape: 64x64x254x254
   dtypes: [f32, bf16]
+  warning: "missing `LLVMTranslationDialectInterface` registration for dialect for op: math.erf"
 
 - kernel: level2/55_Matmul_MaxPool_Sum_Scale.py
   input_shapes: [128x32768]
@@ -351,6 +362,7 @@
   initializations: [rnd]
   output_shape: 128x1x31x63x63
   dtypes: [f32, bf16]
+  warning: "'ERROR: torch_mlir.compiler_utils.TorchMlirCompilerError: failed to legalize operation torch.constant.bool'"
 
 - kernel: level2/59_Matmul_Swish_Scaling.py
   input_shapes: [128x32768]