diff --git a/examples/end-to-end/KernelBench/test_kernel_bench.py b/examples/end-to-end/KernelBench/test_kernel_bench.py
index ec8b278..a432643 100755
--- a/examples/end-to-end/KernelBench/test_kernel_bench.py
+++ b/examples/end-to-end/KernelBench/test_kernel_bench.py
@@ -126,6 +126,11 @@ def get_flops_per_second(stdout: str, gflops: float) -> float:
         type=str,
         help="Specify a particular test to run.",
     )
+    Parser.add_argument(
+        "--print-output",
+        action=argparse.BooleanOptionalAction,
+        help="Whether to print the output of the kernel. Default is False.",
+    )
     Parser.add_argument(
         "--print-mlir-after-all",
         action=argparse.BooleanOptionalAction,
@@ -180,7 +185,7 @@ def get_flops_per_second(stdout: str, gflops: float) -> float:
             ]
 
         # Smoke tests / CI don't print outputs.
-        if not args.smoke_test and not args.ci:
+        if args.print_output:
             command_line += ["--print-output"]
 
         # For debugging, prefer not to capture output.
diff --git a/tools/kernel_bench b/tools/kernel_bench
index 33ef99e..24a52ec 100755
--- a/tools/kernel_bench
+++ b/tools/kernel_bench
@@ -17,6 +17,11 @@ from lighthouse.schedule import convert_function_results
 from lighthouse import dialects as lh_dialects
 from lighthouse import ingress as lh_ingress
 from lighthouse.ingress.torch import cpu_backend
+from lighthouse.utils.mlir import get_mlir_library_path
+import os
+
+lib_dir = get_mlir_library_path()
+c_runner_lib = os.path.join(lib_dir, "libmlir_c_runner_utils.so")
 
 
 def import_torch(
@@ -168,7 +173,10 @@ def torch_compile(args, buffers: list, sample_tensors: list):
         )
     else:
         # Reconfigure the model to be compiled using torch.compile, take the compiled output.
-        model.compile(dynamic=False, backend=cpu_backend(compiler_pipeline))
+        model.compile(
+            dynamic=False,
+            backend=cpu_backend(compiler_pipeline, shared_libs=[c_runner_lib]),
+        )
         out = model(*sample_tensors, **sample_kwargs)
 
     return out