xnvme · yonggilsong · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 17, 2026
diff --git a/auxiliary/cpu_freq_logger.sh b/auxiliary/cpu_freq_logger.sh
@@ -12,6 +12,20 @@ fi
 
 rm -f $OUTFILE
 
+HAS_FREQ_FILE=0
+for cpu in /sys/devices/system/cpu/cpu*/cpufreq; do
+    if [[ -f "$cpu/cpuinfo_cur_freq" || -f "$cpu/scaling_cur_freq" ]]; then
+        HAS_FREQ_FILE=1
+        break
+    fi
+done
+
+if [[ $HAS_FREQ_FILE -eq 0 ]]; then
+    echo "UNSUPPORTED" > "$OUTFILE"
+    echo "CPU frequency logging unsupported on this platform"
+    exit 0
+fi
+
 nohup bash -c "
 while true; do
     TS=\$(date +\"%Y-%m-%d_%H:%M:%S\")

diff --git a/configs/bench_fio_compare.toml b/configs/bench_fio_compare.toml
@@ -0,0 +1,3 @@
+[fio_compare]
+fio_size = "16GiB"
+output_root = "cijoe-output/artifacts/fio-compare"
diff --git a/configs/devices_16.toml b/configs/devices_16.toml
@@ -2,7 +2,7 @@
 fail_fast=true
 
 [xnvme.driver]
-prefix = "PCI_BLACKLIST=0000:01:00.0"
+prefix = "PCI_BLACKLIST=\"0000:01:00.0\""
 
 [[devices]]
 pci_addr = "0000:4a:00.0"

diff --git a/scripts/bench_helper.py b/scripts/bench_helper.py
@@ -32,6 +32,7 @@
 
 from bdevperf import bdevperf_cmd, create_config as bdevperf_config
 from dcgm_helper import DcgmHelper
+from fio_xnvme import fio_xnvme_cmd, fio_xnvme_prefill_cmd
 from spdk_nvme_perf import spdk_nvme_perf_cmd
 from xnvmeperf import xnvmeperf_cmd, xnvmeperf_cuda_cmd
 
@@ -45,6 +46,7 @@ def __init__(
             cfm: CpuFrequencyHelper,
             tool: str = "bdevperf",
             backend: str = "spdk",
+            fio_size: str = "16GiB",
     ):
         self.initialised = False
 
@@ -55,7 +57,7 @@ def __init__(
         self.stress = False
         self.backend = backend
         self.tool = tool
-
+        self.fio_size = fio_size
         self.dcgm = DcgmHelper(cijoe) if backend == "upcie-cuda" else None
 
         self.use_thrsib = False
@@ -79,8 +81,11 @@ def __init__(
                 self.bin = Path(spdk_path)  / "build" / "bin" / "spdk_nvme_perf"
         elif tool in ["xnvmeperf", "xnvmeperf-cuda"]:
             self.bin = "xnvmeperf"
+        elif tool == "fio_xnvme":
+            self.bin = "fio"
         else:
             log.error(f"Failed: Unknown tool({tool})")
+            return
 
         self.initialised = True
 
@@ -98,10 +103,9 @@ def use_thread_siblings(self, use_thrsib: bool) -> int:
             return err
 
         self.use_thrsib = use_thrsib
-
         return 0
 
-    def run_benchmark(self, depth: int, size: int, ndevs: int, ncpus: int, time: int, cpu_freq: float, suffix: str = "", nqueues: int = 1):
+    def run_benchmark(self, rw: str, depth: int, size: int, ndevs: int, ncpus: int, time: int, cpu_freq: float, suffix: str = "", nqueues: int = 1, prefill_only: int = 0):
         if not self.initialised:
             log.error("Failed: benchmarker not initialised correctly")
             return 1, None
@@ -114,10 +118,18 @@ def run_benchmark(self, depth: int, size: int, ndevs: int, ncpus: int, time: int
                 f"be_{self.backend}-tool_{self.tool}"
                 f"{suffix}.out"
             )
+        elif self.tool == "fio_xnvme":
+            filename = (
+                f"d{ndevs}-c{ncpus}-o{size}-f_{self.fio_size}-"
+                f"rw_{rw}-q{depth}-be_{self.backend}-tool_{self.tool}-"
+                f"thrsib{1 if self.use_thrsib else 0}-"
+                f"freq_{cpu_freq}-"
+                f"stress{1 if self.stress else 0}"
+                f"{suffix}.out"
+            )
         else:
             filename = (
-                f"d{ndevs}-c{ncpus}-o{size}-q{depth}-"
-                f"be_{self.backend}-tool_{self.tool}-"
+                f"d{ndevs}-c{ncpus}-o{size}-q{depth}-be_{self.backend}-tool_{self.tool}-"
                 f"thrsib{1 if self.use_thrsib else 0}-"
                 f"freq_{cpu_freq}-"
                 f"stress{1 if self.stress else 0}"
@@ -131,17 +143,35 @@ def run_benchmark(self, depth: int, size: int, ndevs: int, ncpus: int, time: int
                 return 0, result
 
         bench_args = {
-            "iopattern": "randread",
+            "iopattern": rw,
             "qdepth": depth,
             "iosize": size,
             "runtime": time,
             "devices": [d["pci_addr"] for d in self.devices[0:ndevs]],
         }
 
+        if self.tool == "fio_xnvme":
+            bench_args["backend"] = self.backend
+            bench_args["rw"] = rw
+            bench_args["fio_size"] = self.fio_size
+
         if not is_cuda:
             bench_args["cpumask"] = self.cpu_masks[ncpus]
 
-        command = f"/usr/bin/time "
+        if self.tool == "fio_xnvme" and ndevs != 1:
+            log.error("Failed: fio_xnvme currently supports exactly 1 device per benchmark point")
+            return 1, None
+
+        if is_cuda:
+            selected_cpus = []
+            command = f"/usr/bin/time "
+        else:
+            selected_cpus = [v[0] for v in self.cpu_pairs if int(bench_args["cpumask"], 16) & (1 << v[0])]
+            command = f"/usr/bin/time "
+
+        if self.tool == "fio_xnvme":
+            cpu_list = ",".join(str(cpu) for cpu in selected_cpus)
+            command = f"taskset -c {cpu_list} {command}"
 
         if self.tool == "bdevperf":
             config_local_path = self.configs_path / f"d{ndevs}.json"
@@ -150,10 +180,8 @@ def run_benchmark(self, depth: int, size: int, ndevs: int, ncpus: int, time: int
 
             bench_args["config_path"] = self.remote_config
             command += bdevperf_cmd(self.bin, bench_args)
-
         elif self.tool == "spdk_nvme_perf":
             command += spdk_nvme_perf_cmd(self.bin, bench_args)
-
         elif self.tool == "xnvmeperf":
             bench_args["backend"] = self.backend
             command += xnvmeperf_cmd(self.bin, bench_args)
@@ -163,15 +191,15 @@ def run_benchmark(self, depth: int, size: int, ndevs: int, ncpus: int, time: int
             bench_args["nqueues"] = nqueues
             command += xnvmeperf_cuda_cmd(self.bin, bench_args)
 
+        elif self.tool == "fio_xnvme":
+            if prefill_only:
+                command += fio_xnvme_prefill_cmd(self.bin, bench_args)
+            else:
+                command += fio_xnvme_cmd(self.bin, bench_args)
         else:
             log.error(f"Unknown tool: {self.tool}")
             return -1, None
 
-        if is_cuda:
-            selected_cpus = []
-        else:
-            selected_cpus = [v[0] for v in self.cpu_pairs if int(bench_args["cpumask"], 16) & (1 << v[0])]
-
         if self.stress and (stressed_cpus := [str(x) for x in range(len(self.cpu_pairs)) if x not in selected_cpus]):
             command = "\n".join([
                 f"taskset -c {','.join(stressed_cpus)} stress-ng --cpu {len(stressed_cpus)} --timeout {time + 5}s &",
@@ -231,22 +259,29 @@ def abort_monitors():
                 log.error("Failed: DcgmHelper.stop_and_parse()")
                 return err, None
 
-        cpu_freqs = [[cpu_freqs[idx], self.cpu_pairs[idx]] for idx in selected_cpus]
+        if self.cfm.cpu_control_supported and cpu_freqs:
+            cpu_freqs = [[cpu_freqs[idx], self.cpu_pairs[idx]] for idx in selected_cpus]
+        else:
+            cpu_freqs = []
 
         result = {
+            "rw": rw,
             "qdepth": depth,
             "iosize": size,
+            "fio_size": self.fio_size,
             "ndevs": ndevs,
             "ncpus": ncpus,
             "nqueues": nqueues,
+            "device_bdf": bench_args["devices"][0] if bench_args["devices"] else None,
             "cpu_usage": cpu_usage,
             "cpu_freqs": cpu_freqs,
             "fixed_freq": self.cfm.fixed_freq,
             "cpu_governor": self.cfm.governor,
-            "thr_sib": 1 if self.use_thrsib else 0,
+            "cpu_control_supported": 1 if self.cfm.cpu_control_supported else 0,
+            "thr_sib": self.use_thrsib,
             "smt": 1 if "SMT1" in suffix else 0,
             "turbo": 1 if "turbo1" in suffix else 0,
-            "stress": 1 if self.stress else 0,
+            "stress": self.stress,
             "tool": self.tool,
             "backend": self.backend,
             "iops": bench_result["total"]["iops"],
@@ -271,7 +306,6 @@ def _create_cpumasks(self, use_thrsib: bool):
             `(err, cpu_masks)` where a non-zero value for `err` describes that an error
             occured either while running `lscpu` or while parsing the output.
         """
-
         err, state = self.cijoe.run("lscpu -e")
         if err:
             log.error(f"Failed: lscpu -e")
@@ -284,7 +318,6 @@ def _create_cpumasks(self, use_thrsib: bool):
             return 1,
 
         cpu_pairs = [[int(v) for v in match.groupdict().values()] for match in matches]
-
         if use_thrsib:
             pairs = sorted(cpu_pairs, key=lambda p: p[1])
         else:
@@ -305,7 +338,6 @@ def _create_cpumasks(self, use_thrsib: bool):
 
         self.cpu_masks = cpu_masks
         self.cpu_pairs = cpu_pairs
-
         return 0
 
     def _parse_bench_results(self, table: str) -> Tuple[int, dict[str, list]]:
@@ -316,7 +348,6 @@ def _parse_bench_results(self, table: str) -> Tuple[int, dict[str, list]]:
         Returns `(err, result)`, where a non-zero value for `err` describes that the
         output did not match the expected format.
         """
-
         result = { "devices": [] }
         table_regex = None
 
@@ -326,6 +357,8 @@ def _parse_bench_results(self, table: str) -> Tuple[int, dict[str, list]]:
             table_regex = r"\s*(?P<name>.+?)\s*?:\s+(?P<iops>[0-9.]+)\s+(?P<mibs>[0-9.]+)\s+(?P<avg_lat>[0-9.]+)\s+(?P<min_lat>[0-9.]+)\s+(?P<max_lat>[0-9.]+)"
         elif self.tool in ["xnvmeperf", "xnvmeperf-cuda"]:
             table_regex = r"\s*(?P<name>\w+):?\s+(?P<cpus>[0-9,]+)?\s+(?P<iops>[0-9.]+)\s+(?P<mibs>[0-9.]+)\s+(?P<fails>[0-9.]+)"
+        elif self.tool == "fio_xnvme":
+            return self._parse_fio_results(table)
         else:
             log.error(f"Unkown tool: {self.tool}")
             return -1, None
@@ -341,14 +374,54 @@ def _parse_bench_results(self, table: str) -> Tuple[int, dict[str, list]]:
 
         return 0, result
 
+    def _parse_fio_results(self, output: str) -> Tuple[int, dict[str, list]]:
+        start = output.find("{")
+        end = output.rfind("}")
+        if start < 0 or end < 0 or end <= start:
+            log.error("Failed: could not find fio JSON output")
+            return 1, None
+
+        try:
+            payload = json.loads(output[start:end + 1])
+        except json.JSONDecodeError:
+            log.error("Failed: invalid fio JSON output")
+            return 1, None
+
+        jobs = payload.get("jobs", [])
+        if not jobs:
+            log.error("Failed: fio returned no jobs")
+            return 1, None
+
+        total_iops = 0.0
+        total_mibs = 0.0
+        devices = []
+        for job in jobs:
+            metrics = job.get("read", {})
+            if not float(metrics.get("iops", 0.0)):
+                write_metrics = job.get("write", {})
+                if float(write_metrics.get("iops", 0.0)):
+                    metrics = write_metrics
+            iops = float(metrics.get("iops", 0.0))
+            mibs = float(metrics.get("bw_bytes", 0.0)) / (1024 * 1024)
+            total_iops += iops
+            total_mibs += mibs
+            devices.append({"iops": iops, "mibs": mibs})
+
+        return 0, {
+            "devices": devices,
+            "total": {
+                "iops": total_iops,
+                "mibs": total_mibs,
+            },
+        }
+
     def _parse_time_output(self, output: str) -> Tuple[int, int]:
         """
         Find the CPU usage from /usr/bin/time in from the output of /usr/bin/time.
 
         Returns `(err, cpu_usage)`, where a non-zero value for `err` describes that the
         output did not match the expected format.
         """
-
         time_regex = r"(?P<user>[0-9.]+)user (?P<system>[0-9.]+)system (?P<elapsed>[0-9.:]+)elapsed (?P<cpu>[0-9.]+)%CPU .*k"
         m = search(time_regex, output)
 

diff --git a/scripts/bench_runall.py b/scripts/bench_runall.py
@@ -35,11 +35,14 @@ def add_args(parser: ArgumentParser):
     parser.add_argument("--smt", type=int, default=[0,1], nargs="+", help="0 for SMT off, 1 for SMT on, [0,1] for testing both")
     parser.add_argument("--hyperthreads", type=int, default=[0,1], nargs="+", help="0 for hyper threads off, 1 for hyper threads on, [0,1] for testing both. Note that you cannot test with hyper threads if SMT is turned off")
     parser.add_argument("--stress", type=int, default=[0,1], nargs="+", help="0 for not stressing unused CPUs, 1 for stressing unused CPUs, [0,1] for testing both")
-    parser.add_argument("--time", type=int, default=10, help="Time for for bdevperf to run for each test")
+    parser.add_argument("--time", type=int, default=5, help="Time for each benchmark run")
+    parser.add_argument("--fio_size", type=str, default="16GiB", help="Working-set size for fio_xnvme")
     parser.add_argument("--results_dir", type=Path, default=None, help="Path to existing directory in which the results should be saved. Note: Already existing results will not be benchmarked again")
     parser.add_argument("--repetitions", type=int, default=5, help="The amount of times each benchmark will be repeated. The result will be average of the repetitions")
     parser.add_argument("--nqueues", type=int, default=[1], nargs="+", help="Number of queues per device (used by xnvmeperf-cuda)")
-    parser.add_argument("--tool", choices=["bdevperf", "xnvmeperf", "spdk_nvme_perf", "xnvmeperf-cuda"], default="xnvmeperf")
+    parser.add_argument("--rws", type=str, default=["randread"], nargs="+", help="List of I/O patterns to test")
+    parser.add_argument("--prefill_only", type=int, default=0, help="Run fio_xnvme in prefill-only mode")
+    parser.add_argument("--tool", choices=["bdevperf", "xnvmeperf", "spdk_nvme_perf", "xnvmeperf-cuda", "fio_xnvme"], default="xnvmeperf")
     parser.add_argument("--backend", type=str, default="upcie")
 
 
@@ -75,7 +78,7 @@ def main(args, cijoe: Cijoe):
         log.error("Failed: transfer_cpu_frequency_logger()")
         return err
 
-    benchmarker = BenchHelper(cijoe, bdev_configs, bdev_results, cfm, args.tool, args.backend)
+    benchmarker = BenchHelper(cijoe, bdev_configs, bdev_results, cfm, args.tool, args.backend, args.fio_size)
     if not benchmarker.initialised:
         log.error("Failed: could not initialise BenchHelper")
         return 1
@@ -95,7 +98,7 @@ def main(args, cijoe: Cijoe):
             now = time()
 
             for i in range(args.repetitions):
-                err, result = benchmarker.run_benchmark(qd, iosz, devs, 0, args.time, "N/A", f"-{i}", nq)
+                err, result = benchmarker.run_benchmark("randread", qd, iosz, devs, 0, args.time, "N/A", f"-{i}", nq, args.prefill_only)
                 if err:
                     log.error("Failed: run_benchmark()")
                     return err
@@ -124,17 +127,17 @@ def main(args, cijoe: Cijoe):
     tests = []
 
     if 0 in args.hyperthreads:
-        tests += product([0], args.turbo, args.smt, args.stress, args.cpu_freqs, test_devs, test_cpus, args.sizes, args.depths)
+        tests += product([0], args.turbo, args.smt, args.stress, args.cpu_freqs, test_devs, test_cpus, args.rws, args.sizes, args.depths)
     if 1 in args.hyperthreads:
         # shift range to match cpu hyperthreads
         test_cpus = [x for cpu in test_cpus for x in [cpu*2-1, cpu*2]]
-        tests += product([1], args.turbo, args.smt, args.stress, args.cpu_freqs, test_devs, test_cpus, args.sizes, args.depths)
+        tests += product([1], args.turbo, args.smt, args.stress, args.cpu_freqs, test_devs, test_cpus, args.rws, args.sizes, args.depths)
 
-    tests = [(ht,tu,sm,st,f,d,c,o,q) for (ht,tu,sm,st,f,d,c,o,q) in tests if not (not sm and ht)]
+    tests = [(ht,tu,sm,st,f,d,c,rw,o,q) for (ht,tu,sm,st,f,d,c,rw,o,q) in tests if not (not sm and ht)]
 
     finished, total, now = 0, len(tests), time()
 
-    for ht, tu, sm, st, freq, devs, cpus, iosz, qd in tests:
+    for ht, tu, sm, st, freq, devs, cpus, rw, iosz, qd in tests:
         err = cfm.toggle_smt(sm)
         if err:
             log.error(f"Failed: cfm.toggle_smt({sm})")
@@ -160,7 +163,7 @@ def main(args, cijoe: Cijoe):
         now = time()
 
         for i in range(args.repetitions):
-            err, result = benchmarker.run_benchmark(qd, iosz, devs, cpus, args.time, freq, f"{suffix}-{i}")
+            err, result = benchmarker.run_benchmark(rw, qd, iosz, devs, cpus, args.time, freq, f"{suffix}-{i}", prefill_only=args.prefill_only)
             if err:
                 log.error("Failed: run_benchmark()")
                 return err