From f3e9d8fbcd905b968b681999c9a5fed03c5c6640 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 4 Mar 2026 12:17:00 +0100 Subject: [PATCH 1/3] Add compilation data to de/serialization in T4 format --- ...oulomb_rtx500ada_full_search_space.t4.json | 16352 ++++++++++------ Source/Output/JsonT4Converters.cpp | 69 +- .../03KernelTuning/FullSearchSpace.t4.json | 262 +- 3 files changed, 10194 insertions(+), 6489 deletions(-) diff --git a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json index 3f49061c..390dfc84 100644 --- a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json +++ b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json @@ -5,11 +5,28 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-01-27 09:27:37 UTC", + "timestamp": "2026-03-02 14:29:51 UTC", "timeunit": "microseconds" }, "results": [ { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 44 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26,61 +43,61 @@ { "name": "time", "unit": "", - "value": 4651.232 + "value": 4624.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.999069148936172 + "value": 10.411948636927773 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102324.0 + "value": 232.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868068.0 + "value": 1868228.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.167714690042399 + "value": 1.6188741888304559 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2158716.0 + "value": 64674.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100115.0 + "value": 2099214.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.719930504269065 + "value": 22.494926321081408 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0482729233324062 + "value": 1.0542418047027353 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -110,13 +127,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 63.0898831843032 + "value": 60.42613609541247 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95217016189612 + "value": 99.95692785096995 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -128,7 +145,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -152,7 +169,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -170,13 +187,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 342360064.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.853397078956995 + "value": 39.990894664469515 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -188,13 +205,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.74771415475139 + "value": 45.000366292150126 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.060907330772839 + "value": 11.953222296352378 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -206,7 +223,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.07150258663136 + "value": 57.21795747517453 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -225,21 +242,38 @@ "time" ], "times": { - "compilation": 15894.075, - "data": 63084.019, - "framework": 274855.673, - "kernel_overhead": 61015.864, - "profiling_overhead": 53355.97, - "profiling_runs": 97399.82, + "compilation": 38667.992, + "data": 65390.136, + "framework": 276837.773, + "kernel_overhead": 60081.448, + "profiling_overhead": 54665.666, + "profiling_runs": 96700.523, "runtimes": [ - 4651.232 + 4624.704 ], - "search_algorithm": 21.935, - "validation": 18.527 + "search_algorithm": 23.173, + "validation": 16.077 }, - "timestamp": "2026-01-27 09:24:55 UTC" + "timestamp": "2026-03-02 14:27:8 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 44 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -256,61 +290,61 @@ { "name": "time", "unit": "", - "value": 3832.736 + "value": 4111.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.653704339834757 + "value": 11.747035144386057 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097640.0 + "value": 828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869556.0 + "value": 1870548.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2557753347641527 + "value": 1.6464863800751055 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2152522.0 + "value": 64999.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099338.0 + "value": 2099246.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.724279323889956 + "value": 22.508424333784806 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0484961465334799 + "value": 1.0546425853891812 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -340,13 +374,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 71.17747964784758 + "value": 71.46997356111453 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96101380252213 + "value": 99.94899829123742 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -358,7 +392,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -382,7 +416,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -400,13 +434,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 342360064.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.85674782392271 + "value": 40.00891329141059 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -418,13 +452,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.75328318879029 + "value": 45.02104514559877 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.062408359478633 + "value": 11.958715116799674 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -436,7 +470,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.07860953396677 + "value": 57.24425298156683 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -455,21 +489,38 @@ "time" ], "times": { - "compilation": 14591.79, - "data": 63808.032, - "framework": 252762.744, - "kernel_overhead": 52546.761, - "profiling_overhead": 53544.089, - "profiling_runs": 82863.862, + "compilation": 14047.967, + "data": 66698.358, + "framework": 266112.539, + "kernel_overhead": 54507.466, + "profiling_overhead": 57011.748, + "profiling_runs": 87894.967, "runtimes": [ - 3832.736 + 4111.36 ], - "search_algorithm": 15.249, - "validation": 13.217 + "search_algorithm": 34.088, + "validation": 17.611 }, - "timestamp": "2026-01-27 09:24:55 UTC" + "timestamp": "2026-03-02 14:27:8 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 44 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -486,61 +537,61 @@ { "name": "time", "unit": "", - "value": 4195.2 + "value": 3825.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.933837240860065 + "value": 12.980192345577304 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099248.0 + "value": 8540.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838660.0 + "value": 1846884.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.28597843151674 + "value": 1.6980193787316176 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2161221.0 + "value": 72050.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099596.0 + "value": 2135034.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.804108847162897 + "value": 22.559228035495185 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0521705512978097 + "value": 1.0571974309534597 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -570,13 +621,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 63.229384176356774 + "value": 60.38176541052634 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9563813179575 + "value": 99.95697546885552 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -588,7 +639,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -612,7 +663,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -630,13 +681,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 342360064.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.99950810570688 + "value": 40.10313416890375 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -648,13 +699,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.91220029855307 + "value": 45.12650585491358 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.10524148671938 + "value": 11.98672811771142 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -666,7 +717,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.28128886477305 + "value": 57.378335836811935 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -685,21 +736,38 @@ "time" ], "times": { - "compilation": 13392.944, - "data": 61640.938, - "framework": 249183.528, - "kernel_overhead": 52751.502, - "profiling_overhead": 51337.493, - "profiling_runs": 83453.595, + "compilation": 13892.782, + "data": 66846.285, + "framework": 254133.20500000002, + "kernel_overhead": 50680.543, + "profiling_overhead": 55639.911, + "profiling_runs": 80966.466, "runtimes": [ - 4195.2 + 3825.376 ], - "search_algorithm": 20.661, - "validation": 15.155 + "search_algorithm": 23.45, + "validation": 13.532 }, - "timestamp": "2026-01-27 09:24:55 UTC" + "timestamp": "2026-03-02 14:27:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 44 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -716,61 +784,61 @@ { "name": "time", "unit": "", - "value": 3863.712 + "value": 3746.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.469185280182394 + "value": 13.059047490312697 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107232.0 + "value": 12600.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840736.0 + "value": 1842328.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2911661880046226 + "value": 1.687278699365753 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2169574.0 + "value": 75401.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105187.0 + "value": 2105665.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.813756531737464 + "value": 22.571437825600857 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0526402580716179 + "value": 1.0577408875914127 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -800,13 +868,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 70.89450355115572 + "value": 70.6814225076394 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97109238732202 + "value": 99.9560947457864 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -818,7 +886,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -842,7 +910,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -860,13 +928,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 342360064.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.012024154563306 + "value": 40.124476280278614 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -878,13 +946,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.92563794046464 + "value": 45.150101137248946 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.108863351140862 + "value": 11.992995614581751 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -896,7 +964,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.29843116293686 + "value": 57.408348162568586 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -915,21 +983,38 @@ "time" ], "times": { - "compilation": 12955.431, - "data": 63538.764, - "framework": 247475.775, - "kernel_overhead": 50322.479, - "profiling_overhead": 52939.861, - "profiling_runs": 80674.671, + "compilation": 14499.703, + "data": 69820.855, + "framework": 258114.99899999998, + "kernel_overhead": 50014.144, + "profiling_overhead": 58158.457, + "profiling_runs": 80121.543, "runtimes": [ - 3863.712 + 3746.176 ], - "search_algorithm": 23.26, - "validation": 15.311 + "search_algorithm": 23.333, + "validation": 17.556 }, - "timestamp": "2026-01-27 09:24:56 UTC" + "timestamp": "2026-03-02 14:27:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 44 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -946,61 +1031,61 @@ { "name": "time", "unit": "", - "value": 3792.416 + "value": 3683.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.870563365578676 + "value": 13.150522171212039 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2108580.0 + "value": 8948.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1845116.0 + "value": 1838976.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.300943957180106 + "value": 1.6826861930656676 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2171062.0 + "value": 69034.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102819.0 + "value": 2100456.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.801248795295464 + "value": 22.5516869656456 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0519954621566185 + "value": 1.0565964972188082 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1030,13 +1115,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.03157707187836 + "value": 72.23711013846311 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95339735360602 + "value": 99.94477798296198 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1048,7 +1133,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1072,7 +1157,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -1090,13 +1175,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 342360064.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.99393992034953 + "value": 40.08458039205776 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1108,13 +1193,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.90606713435846 + "value": 45.10635918935927 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.103588407307553 + "value": 11.981376659673556 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1126,7 +1211,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.27344651590261 + "value": 57.35271112518268 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1145,21 +1230,38 @@ "time" ], "times": { - "compilation": 12710.457, - "data": 63425.718, - "framework": 251182.77000000002, - "kernel_overhead": 52215.42, - "profiling_overhead": 52969.994, - "profiling_runs": 82571.638, + "compilation": 14510.8, + "data": 64151.459, + "framework": 250951.25100000002, + "kernel_overhead": 52053.758, + "profiling_overhead": 52741.051, + "profiling_runs": 82004.983, "runtimes": [ - 3792.416 + 3683.744 ], - "search_algorithm": 28.097, - "validation": 13.513 + "search_algorithm": 23.625, + "validation": 13.158 }, - "timestamp": "2026-01-27 09:24:56 UTC" + "timestamp": "2026-03-02 14:27:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -1176,61 +1278,61 @@ { "name": "time", "unit": "", - "value": 2103.232 + "value": 2084.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.9894403691434 + "value": 23.641546139908108 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104528.0 + "value": 488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870436.0 + "value": 1870016.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.716705659760554 + "value": 2.9409661601340997 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2125531.0 + "value": 36048.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104498.0 + "value": 2099100.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.445196457654134 + "value": 19.91120337137657 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9288396081155547 + "value": 0.9328369539472485 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1260,13 +1362,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.36806905508905 + "value": 96.16141159052735 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95109735619351 + "value": 99.93681929633743 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1278,7 +1380,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1302,7 +1404,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -1320,13 +1422,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 257949696.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.41447144551186 + "value": 50.482728224246095 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1338,13 +1440,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.29975957148999 + "value": 79.65241169102248 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.996646346827713 + "value": 10.73440704429795 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1356,7 +1458,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.20309939807616 + "value": 76.23083997925477 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1375,21 +1477,38 @@ "time" ], "times": { - "compilation": 12696.228, - "data": 64060.867, - "framework": 259293.284, - "kernel_overhead": 59562.187, - "profiling_overhead": 53886.224, - "profiling_runs": 81784.006, + "compilation": 52783.613, + "data": 66038.952, + "framework": 263544.398, + "kernel_overhead": 60380.561, + "profiling_overhead": 54869.582, + "profiling_runs": 82255.303, "runtimes": [ - 2103.232 + 2084.736 ], - "search_algorithm": 20.282, - "validation": 10.726 + "search_algorithm": 24.112, + "validation": 15.49 }, - "timestamp": "2026-01-27 09:24:56 UTC" + "timestamp": "2026-03-02 14:27:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -1406,61 +1525,61 @@ { "name": "time", "unit": "", - "value": 2107.776 + "value": 2124.64 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.82462202259211 + "value": 23.462335694639886 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101224.0 + "value": 6816.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871116.0 + "value": 1871252.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.691820956355132 + "value": 2.9374073118834225 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2121053.0 + "value": 42415.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099430.0 + "value": 2100271.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.43997317090961 + "value": 19.911763669148296 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9286236741791437 + "value": 0.9328470453106084 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1490,13 +1609,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.92937884280917 + "value": 94.35967604961488 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95518132505144 + "value": 99.93735268875814 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1508,7 +1627,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1532,7 +1651,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -1550,13 +1669,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 257949696.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.400247107039256 + "value": 50.48388025347601 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1568,13 +1687,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.27808491713806 + "value": 79.65284823425156 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.993640681868753 + "value": 10.734465875319058 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1586,7 +1705,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.18232725366094 + "value": 76.23131236852525 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1605,21 +1724,38 @@ "time" ], "times": { - "compilation": 13735.602, - "data": 64822.879, - "framework": 263696.87, - "kernel_overhead": 60967.435, - "profiling_overhead": 54726.219, - "profiling_runs": 83180.337, + "compilation": 14412.918, + "data": 66523.767, + "framework": 264283.49100000004, + "kernel_overhead": 60050.037, + "profiling_overhead": 55476.802, + "profiling_runs": 82232.885, "runtimes": [ - 2107.776 + 2124.64 ], - "search_algorithm": 31.496, - "validation": 17.017 + "search_algorithm": 22.515, + "validation": 18.746 }, - "timestamp": "2026-01-27 09:24:56 UTC" + "timestamp": "2026-03-02 14:27:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -1636,61 +1772,61 @@ { "name": "time", "unit": "", - "value": 2172.736 + "value": 2127.392 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.7781876722745 + "value": 22.94248148089273 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102452.0 + "value": 6820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838888.0 + "value": 1840560.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.756236458540647 + "value": 2.9203920145111426 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2138777.0 + "value": 42596.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099865.0 + "value": 2100374.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.457812185325896 + "value": 19.907779410784542 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9295669406114107 + "value": 0.9327720734660485 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1720,13 +1856,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.40942179179784 + "value": 95.8063099416199 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94859812730616 + "value": 99.93537103793192 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1738,7 +1874,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1762,7 +1898,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -1780,13 +1916,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 257949696.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.455131122317 + "value": 50.479649516642766 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1798,13 +1934,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.36384009889296 + "value": 79.6480259615895 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.005532513713673 + "value": 10.733815998729836 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1816,7 +1952,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.26467935111755 + "value": 76.22664145761797 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1835,21 +1971,38 @@ "time" ], "times": { - "compilation": 12737.195, - "data": 64895.124, - "framework": 261758.47000000003, - "kernel_overhead": 59990.023, - "profiling_overhead": 54818.368, - "profiling_runs": 82054.955, + "compilation": 14353.455, + "data": 69155.819, + "framework": 268261.904, + "kernel_overhead": 59251.505, + "profiling_overhead": 58471.272, + "profiling_runs": 81383.308, "runtimes": [ - 2172.736 + 2127.392 ], - "search_algorithm": 19.429, - "validation": 12.901 + "search_algorithm": 27.037, + "validation": 15.405 }, - "timestamp": "2026-01-27 09:24:56 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -1866,61 +2019,61 @@ { "name": "time", "unit": "", - "value": 2167.616 + "value": 2164.768 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.781884522791344 + "value": 23.249493265068804 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103588.0 + "value": 2928.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838800.0 + "value": 1839652.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.763029892442016 + "value": 2.948621098973287 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2139631.0 + "value": 39384.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099821.0 + "value": 2103158.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.461939577123957 + "value": 19.908356288799332 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9295705445359647 + "value": 0.9326777340553464 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1950,13 +2103,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.61398937577567 + "value": 94.08927620238273 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93817998675392 + "value": 99.92497195202075 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1968,7 +2121,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1992,7 +2145,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -2010,13 +2163,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 257949696.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.45997383498428 + "value": 50.48051672525135 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2028,13 +2181,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.37242117501978 + "value": 79.64825850632965 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.006722467629695 + "value": 10.733847337767083 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2046,7 +2199,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.27297974201687 + "value": 76.2269197951188 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2065,21 +2218,38 @@ "time" ], "times": { - "compilation": 13014.506, - "data": 65424.243, - "framework": 260001.507, - "kernel_overhead": 58639.622, - "profiling_overhead": 55330.292, - "profiling_runs": 80607.35, + "compilation": 14089.717, + "data": 68407.585, + "framework": 268120.423, + "kernel_overhead": 59431.985, + "profiling_overhead": 58664.217, + "profiling_runs": 81616.636, "runtimes": [ - 2167.616 + 2164.768 ], - "search_algorithm": 26.244, - "validation": 14.934 + "search_algorithm": 21.14, + "validation": 14.749 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -2096,61 +2266,61 @@ { "name": "time", "unit": "", - "value": 2162.432 + "value": 2097.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.516484815994446 + "value": 22.793559107453714 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099936.0 + "value": 4904.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838640.0 + "value": 1838712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.728774893376334 + "value": 2.908916147115297 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2134255.0 + "value": 40608.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099949.0 + "value": 2100206.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.358561709573273 + "value": 19.82995532850941 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9254018980227893 + "value": 0.9287484315607809 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2180,13 +2350,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.12230270202133 + "value": 89.89653215951564 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97350521197525 + "value": 99.89606359192587 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2198,7 +2368,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2222,7 +2392,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -2240,13 +2410,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 257949696.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.2173490313751 + "value": 50.28373780185112 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2258,13 +2428,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 78.98855648188172 + "value": 79.33565812656536 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.953491230885941 + "value": 10.69171955221291 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2276,7 +2446,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.90417004020563 + "value": 75.927812963877 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2295,21 +2465,38 @@ "time" ], "times": { - "compilation": 13959.155, - "data": 65241.577, - "framework": 263063.631, - "kernel_overhead": 60492.696, - "profiling_overhead": 54907.83, - "profiling_runs": 82421.528, + "compilation": 14579.325, + "data": 69786.15, + "framework": 270141.891, + "kernel_overhead": 59620.219, + "profiling_overhead": 58990.346, + "profiling_runs": 81745.176, "runtimes": [ - 2162.432 + 2097.536 ], - "search_algorithm": 34.132, - "validation": 17.361 + "search_algorithm": 22.796, + "validation": 13.722 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -2326,61 +2513,61 @@ { "name": "time", "unit": "", - "value": 1879.456 + "value": 1824.16 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.97537091585948 + "value": 26.938519246308672 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098312.0 + "value": 4656.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866428.0 + "value": 1872920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.527628811216685 + "value": 3.3410455860439634 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2113782.0 + "value": 39998.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099419.0 + "value": 2104082.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.957712805363945 + "value": 11.290663649221761 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5271242376447481 + "value": 0.5288503283203805 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2410,13 +2597,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.18329462963054 + "value": 97.07624468088186 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92771171327496 + "value": 99.90168753684452 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2428,7 +2615,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2452,7 +2639,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -2470,13 +2657,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 198443008.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.84885140371172 + "value": 45.83409883610383 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2488,13 +2675,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.02761597920778 + "value": 90.34594402294164 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.593819529727132 + "value": 6.26422072815318 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2506,7 +2693,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.55497740910504 + "value": 66.4374039509074 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2525,21 +2712,38 @@ "time" ], "times": { - "compilation": 13554.856, - "data": 65502.107, - "framework": 212599.62900000002, - "kernel_overhead": 35658.869, - "profiling_overhead": 55076.96, - "profiling_runs": 56361.693, + "compilation": 56995.722, + "data": 62746.945, + "framework": 205096.807, + "kernel_overhead": 34237.658, + "profiling_overhead": 53080.389, + "profiling_runs": 55031.815, "runtimes": [ - 1879.456 + 1824.16 ], - "search_algorithm": 20.045, - "validation": 10.621 + "search_algorithm": 25.066, + "validation": 14.408 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -2556,61 +2760,61 @@ { "name": "time", "unit": "", - "value": 1958.304 + "value": 1852.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.13961868919974 + "value": 26.710777654181406 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104024.0 + "value": 2408.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871816.0 + "value": 1872492.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.564895183710831 + "value": 3.3234204776513065 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2121666.0 + "value": 35972.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105224.0 + "value": 2103006.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.953228628462792 + "value": 11.290177831907842 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5270744387555845 + "value": 0.5288589205992874 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2640,13 +2844,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.71476902332331 + "value": 95.9153412671905 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92725777330782 + "value": 99.90543601809861 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2658,7 +2862,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2682,7 +2886,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -2700,13 +2904,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 198443008.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.84422382894784 + "value": 45.83302684144656 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2718,13 +2922,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.01951975074405 + "value": 90.34402202024393 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.593226544243949 + "value": 6.264087464294256 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2736,7 +2940,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.54905379093708 + "value": 66.43604341357506 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2755,21 +2959,38 @@ "time" ], "times": { - "compilation": 13345.404, - "data": 64587.961, - "framework": 210240.054, - "kernel_overhead": 34936.139, - "profiling_overhead": 54571.254, - "profiling_runs": 56144.7, + "compilation": 14491.392, + "data": 70302.524, + "framework": 219037.966, + "kernel_overhead": 34383.432, + "profiling_overhead": 59097.552, + "profiling_runs": 55254.458, "runtimes": [ - 1958.304 + 1852.864 ], - "search_algorithm": 25.103, - "validation": 14.787 + "search_algorithm": 22.711, + "validation": 16.437 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -2786,61 +3007,61 @@ { "name": "time", "unit": "", - "value": 1852.928 + "value": 1853.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.004300714924035 + "value": 26.093433062880322 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100648.0 + "value": 4344.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838540.0 + "value": 1839848.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.498999091802597 + "value": 3.327966971941587 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2132959.0 + "value": 38830.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100783.0 + "value": 2102725.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.967754333792875 + "value": 11.290329179956203 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5275382546267193 + "value": 0.5289277258012499 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2870,13 +3091,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.36563380035646 + "value": 96.63553138997261 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90405419459563 + "value": 99.92688900760733 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2888,7 +3109,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2912,7 +3133,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -2930,13 +3151,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 198443008.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.895244480476805 + "value": 45.828736498274765 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2948,13 +3169,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.119661491339 + "value": 90.33637769230242 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.600561144385179 + "value": 6.263557437649875 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2966,7 +3187,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.62302852685391 + "value": 66.43036010307041 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2985,21 +3206,38 @@ "time" ], "times": { - "compilation": 13080.658, - "data": 62659.078, - "framework": 208103.469, - "kernel_overhead": 35861.114, - "profiling_overhead": 52838.755, - "profiling_runs": 56744.522, + "compilation": 14138.907, + "data": 69745.293, + "framework": 217301.769, + "kernel_overhead": 34265.859, + "profiling_overhead": 58034.545, + "profiling_runs": 55256.072, "runtimes": [ - 1852.928 + 1853.664 ], - "search_algorithm": 23.384, - "validation": 14.469 + "search_algorithm": 23.834, + "validation": 16.918 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -3016,61 +3254,61 @@ { "name": "time", "unit": "", - "value": 1879.552 + "value": 1845.504 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.59903369628129 + "value": 25.978005713152868 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098452.0 + "value": 5592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836216.0 + "value": 1839420.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.557774548162584 + "value": 3.3128921378104135 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2130329.0 + "value": 37236.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098994.0 + "value": 2100914.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.96720317008775 + "value": 11.290097296949842 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5275674719533896 + "value": 0.5288867478425473 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3100,13 +3338,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.8812382577246 + "value": 96.3149146889141 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90073830143925 + "value": 99.9047134696432 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3118,7 +3356,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3142,7 +3380,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -3160,13 +3398,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 198443008.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.90081261364647 + "value": 45.83816370232567 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3178,13 +3416,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.12764411046199 + "value": 90.34942913468103 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.601145808871728 + "value": 6.2644623716429235 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3196,7 +3434,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.62898727481817 + "value": 66.44001962895105 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3215,21 +3453,38 @@ "time" ], "times": { - "compilation": 13623.498, - "data": 64967.993, - "framework": 212079.33000000002, - "kernel_overhead": 35536.83, - "profiling_overhead": 55220.838, - "profiling_runs": 56353.669, + "compilation": 14566.317, + "data": 67501.197, + "framework": 213170.279, + "kernel_overhead": 34593.433, + "profiling_overhead": 55577.162, + "profiling_runs": 55498.487, "runtimes": [ - 1879.552 + 1845.504 ], - "search_algorithm": 18.887, - "validation": 13.817 + "search_algorithm": 21.079, + "validation": 16.365 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -3246,61 +3501,61 @@ { "name": "time", "unit": "", - "value": 1845.92 + "value": 1860.0 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.435641860891906 + "value": 26.281960486877697 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098096.0 + "value": 1564.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838132.0 + "value": 1839132.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.61764650653196 + "value": 3.337799850239394 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2128610.0 + "value": 31017.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099964.0 + "value": 2099038.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.074206203458965 + "value": 11.381127374713866 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5323049958608608 + "value": 0.5330927590294268 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3330,13 +3585,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.45273082318771 + "value": 94.51491572048003 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91363560727518 + "value": 99.87689745914359 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3348,7 +3603,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3372,7 +3627,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -3390,13 +3645,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 198443008.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 46.30755804990525 + "value": 46.21419104561163 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3408,13 +3663,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.92524633041373 + "value": 91.09330237746549 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.659563940215849 + "value": 6.316039520312549 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3426,7 +3681,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 67.21862133958506 + "value": 66.98702750233574 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3445,21 +3700,38 @@ "time" ], "times": { - "compilation": 13469.896, - "data": 65682.979, - "framework": 211507.90999999997, - "kernel_overhead": 34893.947, - "profiling_overhead": 55309.538, - "profiling_runs": 55621.446, + "compilation": 14879.035, + "data": 68881.508, + "framework": 216159.36200000002, + "kernel_overhead": 34799.521, + "profiling_overhead": 56901.539, + "profiling_runs": 55576.794, "runtimes": [ - 1845.92 + 1860.0 ], - "search_algorithm": 17.68, - "validation": 13.859 + "search_algorithm": 25.458, + "validation": 15.284 }, - "timestamp": "2026-01-27 09:24:57 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -3476,61 +3748,61 @@ { "name": "time", "unit": "", - "value": 1885.824 + "value": 1774.4 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.8551029104505 + "value": 27.69779785137747 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103616.0 + "value": 284.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1867108.0 + "value": 1870864.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.632503515432086 + "value": 3.4109896500830255 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2121424.0 + "value": 31284.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100691.0 + "value": 2097371.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.494533659595679 + "value": 5.796891131757365 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2704471456634533 + "value": 0.2715628421676894 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3560,13 +3832,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.64154138156974 + "value": 98.42110015383683 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96969595219547 + "value": 99.92850632480865 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3578,7 +3850,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3602,7 +3874,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -3620,13 +3892,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 174850048.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.60732786594795 + "value": 42.618593265813004 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3638,13 +3910,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.34060866198398 + "value": 92.75976749345133 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.742319589328452 + "value": 3.3969641416058836 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3656,7 +3928,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.14880698121129 + "value": 60.0594926242331 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3675,21 +3947,38 @@ "time" ], "times": { - "compilation": 13056.195, - "data": 64450.364, - "framework": 198242.983, - "kernel_overhead": 29379.549, - "profiling_overhead": 54400.019, - "profiling_runs": 50013.051, + "compilation": 49228.741, + "data": 70579.426, + "framework": 209115.252, + "kernel_overhead": 29145.654, + "profiling_overhead": 59171.295, + "profiling_runs": 50218.877, "runtimes": [ - 1885.824 + 1774.4 ], - "search_algorithm": 24.614, - "validation": 14.52 + "search_algorithm": 23.047, + "validation": 19.782 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -3706,61 +3995,61 @@ { "name": "time", "unit": "", - "value": 1794.976 + "value": 1785.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.10244422572178 + "value": 27.526361521362368 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098060.0 + "value": 188.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868896.0 + "value": 1870160.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.730148509681079 + "value": 3.416315067902632 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2115904.0 + "value": 30981.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100486.0 + "value": 2098988.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.4915339926948725 + "value": 5.796986553928564 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2702623643162184 + "value": 0.2715609517097533 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3790,13 +4079,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.68083984229285 + "value": 98.32328218533814 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89533842234344 + "value": 99.92587765038782 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3808,7 +4097,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3832,7 +4121,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -3850,13 +4139,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 174850048.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.61142844083994 + "value": 42.62084937854922 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3868,13 +4157,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.34620468132798 + "value": 92.76156189947932 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7425463811280384 + "value": 3.3970298547172604 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3886,7 +4175,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.15251231803256 + "value": 60.06071803833911 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3905,21 +4194,38 @@ "time" ], "times": { - "compilation": 13338.646, - "data": 64127.213, - "framework": 197177.687, - "kernel_overhead": 29192.916, - "profiling_overhead": 54139.402, - "profiling_runs": 49718.156, + "compilation": 14236.209, + "data": 68327.844, + "framework": 203635.83599999998, + "kernel_overhead": 28846.493, + "profiling_overhead": 56972.346, + "profiling_runs": 49489.153, "runtimes": [ - 1794.976 + 1785.664 ], - "search_algorithm": 21.745, - "validation": 14.304 + "search_algorithm": 21.49, + "validation": 15.179 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -3936,61 +4242,61 @@ { "name": "time", "unit": "", - "value": 1812.672 + "value": 1828.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 57.379347278225815 + "value": 26.644876897445393 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097296.0 + "value": 3600.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837036.0 + "value": 1838776.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.719402442311702 + "value": 3.3868480767941698 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2128405.0 + "value": 34631.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098991.0 + "value": 2099304.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.500687384174538 + "value": 5.796852995815277 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2706739093083806 + "value": 0.27156581294048227 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4020,13 +4326,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.86159082681552 + "value": 98.51345226086795 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91431268420496 + "value": 99.92344048005799 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4038,7 +4344,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4062,7 +4368,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -4080,13 +4386,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 174850048.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.66968122115258 + "value": 42.623141924505845 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4098,13 +4404,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.46926213925651 + "value": 92.76548496030877 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.747533573026509 + "value": 3.3971735214956826 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4116,7 +4422,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.23261752459488 + "value": 60.063196260482734 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4135,21 +4441,38 @@ "time" ], "times": { - "compilation": 12951.859, - "data": 64835.14, - "framework": 198413.314, - "kernel_overhead": 29155.627, - "profiling_overhead": 54793.885, - "profiling_runs": 49628.662, + "compilation": 15674.894, + "data": 66963.522, + "framework": 200368.312, + "kernel_overhead": 28805.72, + "profiling_overhead": 54937.765, + "profiling_runs": 49661.305, "runtimes": [ - 1812.672 + 1828.48 ], - "search_algorithm": 22.746, - "validation": 16.995 + "search_algorithm": 21.473, + "validation": 12.864 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -4166,61 +4489,61 @@ { "name": "time", "unit": "", - "value": 1874.208 + "value": 1772.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.40901878416553 + "value": 27.19851269420235 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098824.0 + "value": 1152.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837280.0 + "value": 1836336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.671308175583225 + "value": 3.4170265286554993 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2130027.0 + "value": 31499.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099518.0 + "value": 2099111.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.500956518190572 + "value": 5.79717284051924 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2707194985344289 + "value": 0.2715748156792023 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4250,13 +4573,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.70923893972369 + "value": 98.25363505043956 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9178648982362 + "value": 99.9240947506144 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4268,7 +4591,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4292,7 +4615,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -4310,13 +4633,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 174850048.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.67341114464659 + "value": 42.62431212870764 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4328,13 +4651,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.48154864717917 + "value": 92.76795283111407 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7480315125565773 + "value": 3.397263897623806 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4346,7 +4669,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.24067273135035 + "value": 60.064856001683594 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4365,21 +4688,38 @@ "time" ], "times": { - "compilation": 13268.024, - "data": 63147.229, - "framework": 195912.894, - "kernel_overhead": 29349.356, - "profiling_overhead": 53206.932, - "profiling_runs": 50209.377, + "compilation": 15406.292, + "data": 69711.22, + "framework": 205942.089, + "kernel_overhead": 28741.815, + "profiling_overhead": 58124.029, + "profiling_runs": 49365.025, "runtimes": [ - 1874.208 + 1772.32 ], - "search_algorithm": 24.411, - "validation": 14.448 + "search_algorithm": 21.53, + "validation": 15.203 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -4396,61 +4736,61 @@ { "name": "time", "unit": "", - "value": 1860.032 + "value": 1858.944 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 57.329910215854106 + "value": 27.06206085143158 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102560.0 + "value": 4452.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841004.0 + "value": 1839336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.774905878019141 + "value": 3.436472247431214 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2133760.0 + "value": 35720.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100403.0 + "value": 2100275.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.594743782113634 + "value": 5.875434244928064 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2746429953966901 + "value": 0.2752114506597583 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4480,13 +4820,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.58485780846348 + "value": 97.37759667830144 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93248890520972 + "value": 99.89319297635204 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4498,7 +4838,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4522,7 +4862,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -4540,13 +4880,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 174850048.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.28453605973394 + "value": 43.20920088570921 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4558,13 +4898,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.80813999771843 + "value": 94.0392823837715 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.8017947362356592 + "value": 3.4438213763588204 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4576,7 +4916,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.10478840161182 + "value": 60.8880090861337 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4595,21 +4935,38 @@ "time" ], "times": { - "compilation": 13003.798, - "data": 62419.829, - "framework": 194328.153, - "kernel_overhead": 29340.34, - "profiling_overhead": 52824.851, - "profiling_runs": 49743.133, + "compilation": 14835.442, + "data": 67834.653, + "framework": 202116.83899999998, + "kernel_overhead": 28623.593, + "profiling_overhead": 55977.339, + "profiling_runs": 49681.254, "runtimes": [ - 1860.032 + 1858.944 ], - "search_algorithm": 19.86, - "validation": 13.132 + "search_algorithm": 20.885, + "validation": 18.401 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -4626,61 +4983,61 @@ { "name": "time", "unit": "", - "value": 1731.04 + "value": 1709.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 60.035143824191174 + "value": 28.75953236353669 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098040.0 + "value": 8312.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1863780.0 + "value": 1871908.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.963992687996251 + "value": 3.5429795173434733 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2117071.0 + "value": 38501.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099909.0 + "value": 2100652.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7311426382365545 + "value": 2.9954603282463763 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13980918685620702 + "value": 0.14030932222307607 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4710,13 +5067,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.00429718077244 + "value": 98.37818491636557 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91210903944672 + "value": 99.91166902723094 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4728,7 +5085,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4752,13 +5109,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 162529280.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -4770,13 +5127,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 164495360.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.57796486606208 + "value": 39.53194391455207 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4788,13 +5145,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.52703118580123 + "value": 95.86917948310973 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3088808807115044 + "value": 1.942661595971218 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4806,7 +5163,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.5395641892962 + "value": 58.35130026954217 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4825,21 +5182,38 @@ "time" ], "times": { - "compilation": 13477.646, - "data": 65343.543, - "framework": 204996.081, - "kernel_overhead": 32143.586, - "profiling_overhead": 55115.64, - "profiling_runs": 52393.312, + "compilation": 58583.683, + "data": 68882.676, + "framework": 211853.225, + "kernel_overhead": 32500.187, + "profiling_overhead": 57705.988, + "profiling_runs": 52764.374, "runtimes": [ - 1731.04 + 1709.248 ], - "search_algorithm": 23.539, - "validation": 15.464 + "search_algorithm": 31.204, + "validation": 14.814 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -4856,61 +5230,61 @@ { "name": "time", "unit": "", - "value": 1793.632 + "value": 1739.392 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 59.07460169233115 + "value": 28.43832726671079 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100992.0 + "value": 216.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866704.0 + "value": 1869852.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.90357078888712 + "value": 3.525510714045206 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2118190.0 + "value": 28327.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099720.0 + "value": 2098984.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7306526933032873 + "value": 2.9954521134411047 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13981279785456535 + "value": 0.1402962182017217 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4940,13 +5314,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.54815805545952 + "value": 98.31904876787739 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95209544260113 + "value": 99.90870912636719 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4958,7 +5332,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4982,13 +5356,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 162529280.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -5000,13 +5374,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 164495360.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.56175134643332 + "value": 39.52910689286324 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5018,13 +5392,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.49128134439913 + "value": 95.86306585601984 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3080168098377722 + "value": 1.9425377114379023 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5036,7 +5410,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.51772191696223 + "value": 58.34764631512371 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5055,21 +5429,38 @@ "time" ], "times": { - "compilation": 13204.42, - "data": 61083.556, - "framework": 197219.311, - "kernel_overhead": 32146.712, - "profiling_overhead": 51453.804, - "profiling_runs": 52535.239, + "compilation": 14813.837, + "data": 69903.773, + "framework": 213348.207, + "kernel_overhead": 32181.653, + "profiling_overhead": 58491.338, + "profiling_runs": 52771.443, "runtimes": [ - 1793.632 + 1739.392 ], - "search_algorithm": 25.177, - "validation": 13.911 + "search_algorithm": 27.055, + "validation": 16.977 }, - "timestamp": "2026-01-27 09:24:58 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -5086,61 +5477,61 @@ { "name": "time", "unit": "", - "value": 1751.904 + "value": 1746.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.412663206437166 + "value": 27.864553098030107 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100768.0 + "value": 3584.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839068.0 + "value": 1839596.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.901710433264528 + "value": 3.5281195577528206 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2131555.0 + "value": 35076.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099819.0 + "value": 2103944.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.736208333179525 + "value": 2.9954081735550537 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14003613497532386 + "value": 0.14030571757217217 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5170,13 +5561,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.23922989484699 + "value": 98.4349709774984 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93148693919164 + "value": 99.90951398763967 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5188,7 +5579,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5212,13 +5603,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 162529280.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -5230,13 +5621,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 164495360.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.63581394364581 + "value": 39.53181813066613 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5248,13 +5639,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.66354349821613 + "value": 95.86878436934366 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.312180372637548 + "value": 1.9426535895155088 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5266,7 +5657,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.62322121652317 + "value": 58.35106263850053 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5285,21 +5676,38 @@ "time" ], "times": { - "compilation": 12953.606, - "data": 62892.414, - "framework": 199979.89399999997, - "kernel_overhead": 32148.156, - "profiling_overhead": 52283.25, - "profiling_runs": 52656.074, + "compilation": 14803.571, + "data": 69112.638, + "framework": 211499.565, + "kernel_overhead": 31866.805, + "profiling_overhead": 58046.57, + "profiling_runs": 52473.552, "runtimes": [ - 1751.904 + 1746.08 ], - "search_algorithm": 26.307, - "validation": 11.842 + "search_algorithm": 24.323, + "validation": 16.125 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -5316,61 +5724,61 @@ { "name": "time", "unit": "", - "value": 1741.92 + "value": 1688.064 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.78675066819397 + "value": 28.35324899622107 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101376.0 + "value": 5352.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840064.0 + "value": 1838580.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.939718160306523 + "value": 3.5534382131647413 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2132248.0 + "value": 34400.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100488.0 + "value": 2099880.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.737182801933645 + "value": 2.9954200247897678 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14005667690686743 + "value": 0.14028764904706462 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5400,13 +5808,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.18895211253626 + "value": 98.32169793292385 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95041333417525 + "value": 99.89927778061926 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5418,7 +5826,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5442,13 +5850,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 162529280.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -5460,13 +5868,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 164495360.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.633757121104715 + "value": 39.53140947233898 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5478,13 +5886,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.65945910473754 + "value": 95.86626037454936 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.312081653166264 + "value": 1.9426024441131828 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5496,7 +5904,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.620782419172954 + "value": 58.34959067853044 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5515,21 +5923,38 @@ "time" ], "times": { - "compilation": 13432.248, - "data": 64535.628, - "framework": 202649.111, - "kernel_overhead": 31862.656, - "profiling_overhead": 54143.116, - "profiling_runs": 52107.711, + "compilation": 14630.477, + "data": 69062.602, + "framework": 212199.229, + "kernel_overhead": 32559.446, + "profiling_overhead": 57695.299, + "profiling_runs": 52881.882, "runtimes": [ - 1741.92 + 1688.064 ], - "search_algorithm": 21.493, - "validation": 11.876 + "search_algorithm": 28.889, + "validation": 17.432 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -5546,61 +5971,61 @@ { "name": "time", "unit": "", - "value": 1790.72 + "value": 1732.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.77111421969292 + "value": 28.028384471249563 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101156.0 + "value": 1024.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838032.0 + "value": 1837064.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.927101465448163 + "value": 3.5403768234896105 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2129914.0 + "value": 29461.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100843.0 + "value": 2099906.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7379921650233197 + "value": 3.0033945207103057 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14007465404535668 + "value": 0.14069084149335634 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5630,13 +6055,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.7732647224841 + "value": 98.07198374280273 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91747366618162 + "value": 99.9085561052818 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5648,7 +6073,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5672,13 +6097,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 162529280.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -5690,13 +6115,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 164495360.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.650539434751344 + "value": 39.63881496995679 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5708,13 +6133,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.70327756795102 + "value": 96.1328554198977 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3131407419988164 + "value": 1.9480046386356222 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5726,7 +6151,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.647622556683174 + "value": 58.51184107232393 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5745,21 +6170,38 @@ "time" ], "times": { - "compilation": 12985.032, - "data": 64956.891, - "framework": 204811.138, - "kernel_overhead": 32517.953, - "profiling_overhead": 54602.995, - "profiling_runs": 52733.299, + "compilation": 14810.649, + "data": 66773.137, + "framework": 208322.876, + "kernel_overhead": 32740.908, + "profiling_overhead": 55705.094, + "profiling_runs": 53103.737, "runtimes": [ - 1790.72 + 1732.192 ], - "search_algorithm": 18.233, - "validation": 14.223 + "search_algorithm": 23.927, + "validation": 17.145 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -5776,61 +6218,61 @@ { "name": "time", "unit": "", - "value": 1754.944 + "value": 1723.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 59.67388058263651 + "value": 28.588189045591278 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100600.0 + "value": 4788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1863808.0 + "value": 1868548.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.9769104000135185 + "value": 3.527843606595351 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2123628.0 + "value": 34052.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103685.0 + "value": 2100264.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.243490519241941 + "value": 1.4943314148943247 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07010829472185184 + "value": 0.06999991113292532 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5860,13 +6302,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.39392488448567 + "value": 81.74097974848142 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9594429676896 + "value": 99.9287721139449 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5878,7 +6320,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5902,13 +6344,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 83361792.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -5920,13 +6362,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 151257088.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.167954276302055 + "value": 37.93338271560582 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5938,13 +6380,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.7600291528657 + "value": 95.64133530148415 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5313188255646248 + "value": 1.1558218011287757 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5956,7 +6398,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.95971070078721 + "value": 53.51339158662879 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5975,21 +6417,38 @@ "time" ], "times": { - "compilation": 13505.473, - "data": 63983.618, - "framework": 182594.415, - "kernel_overhead": 22204.429, - "profiling_overhead": 53752.883, - "profiling_runs": 42653.485, + "compilation": 69379.553, + "data": 66020.175, + "framework": 184920.453, + "kernel_overhead": 22170.916, + "profiling_overhead": 54140.152, + "profiling_runs": 42589.21, "runtimes": [ - 1754.944 + 1723.872 ], - "search_algorithm": 26.005, - "validation": 17.642 + "search_algorithm": 22.108, + "validation": 13.947 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -6006,61 +6465,61 @@ { "name": "time", "unit": "", - "value": 1722.816 + "value": 1750.208 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 60.675194965111515 + "value": 28.220868889489537 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103752.0 + "value": 1792.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869784.0 + "value": 1869220.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.988844269227535 + "value": 3.528802546053323 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2124136.0 + "value": 31421.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103583.0 + "value": 2100017.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.24328586246011 + "value": 1.4942586932797675 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06997631238045428 + "value": 0.06999486316995389 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6090,13 +6549,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.09543635078947 + "value": 81.71603627770547 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.81994859349207 + "value": 99.91596080823105 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6108,7 +6567,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6132,13 +6591,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 83361792.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -6150,13 +6609,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 151257088.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.14771877472932 + "value": 37.9340170330616 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6168,13 +6627,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.71332502470273 + "value": 95.6467005621498 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5305719699995188 + "value": 1.1558866400943395 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6186,7 +6645,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.933393444344205 + "value": 53.51639356578415 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6205,21 +6664,38 @@ "time" ], "times": { - "compilation": 13400.194, - "data": 65480.215, - "framework": 184422.36899999998, - "kernel_overhead": 22144.492, - "profiling_overhead": 54497.786, - "profiling_runs": 42299.876, + "compilation": 15378.128, + "data": 69877.93, + "framework": 193548.707, + "kernel_overhead": 22166.081, + "profiling_overhead": 58882.144, + "profiling_runs": 42622.552, "runtimes": [ - 1722.816 + 1750.208 ], - "search_algorithm": 22.003, - "validation": 18.784 + "search_algorithm": 25.207, + "validation": 16.899 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -6236,61 +6712,61 @@ { "name": "time", "unit": "", - "value": 1704.864 + "value": 1723.392 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 60.017881139718774 + "value": 27.79847864699977 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104368.0 + "value": 6076.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840180.0 + "value": 1836932.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.029281433075274 + "value": 3.5455738676476742 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2134800.0 + "value": 37528.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104536.0 + "value": 2103854.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.247705373827939 + "value": 1.4941723309173005 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07018646134689693 + "value": 0.06999297336165862 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6320,13 +6796,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.53105567311181 + "value": 81.74898306986294 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90219382669862 + "value": 99.91735469167092 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6338,7 +6814,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6362,13 +6838,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 83361792.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -6380,13 +6856,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 151257088.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.230691048060685 + "value": 37.93148206567682 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6398,13 +6874,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.921732601652 + "value": 95.64278390343607 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.533904659523488 + "value": 1.1558393074267785 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6416,7 +6892,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 54.05082879455782 + "value": 53.51420211068998 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6435,21 +6911,38 @@ "time" ], "times": { - "compilation": 13571.813, - "data": 64392.441, - "framework": 182234.56999999998, - "kernel_overhead": 22074.925, - "profiling_overhead": 53486.564, - "profiling_runs": 42280.64, + "compilation": 14703.937, + "data": 66049.147, + "framework": 185282.677, + "kernel_overhead": 22134.187, + "profiling_overhead": 54310.174, + "profiling_runs": 42789.169, "runtimes": [ - 1704.864 + 1723.392 ], - "search_algorithm": 24.73, - "validation": 14.987 + "search_algorithm": 22.319, + "validation": 17.682 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -6466,61 +6959,61 @@ { "name": "time", "unit": "", - "value": 1730.848 + "value": 1744.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 59.626270571151984 + "value": 28.064318022303674 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102144.0 + "value": 3080.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839868.0 + "value": 1839448.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.981288753589351 + "value": 3.538117200073247 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2131815.0 + "value": 32620.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102732.0 + "value": 2103065.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.248313651121233 + "value": 1.4941442482665122 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0701807611505624 + "value": 0.06995562872962902 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6550,13 +7043,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.50586725639425 + "value": 81.71308209609535 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89632695839047 + "value": 99.87602381554213 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6568,7 +7061,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6592,13 +7085,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 83361792.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -6610,13 +7103,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 151257088.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.22787235707018 + "value": 37.92614995241385 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6628,13 +7121,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.91957529877905 + "value": 95.631311810383 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5338701616381905 + "value": 1.155700667630361 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6646,7 +7139,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 54.04961317839785 + "value": 53.50778322699359 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6665,21 +7158,38 @@ "time" ], "times": { - "compilation": 13335.483, - "data": 65858.765, - "framework": 184918.30299999999, - "kernel_overhead": 22088.422, - "profiling_overhead": 54717.279, - "profiling_runs": 42253.837, + "compilation": 15673.229, + "data": 68222.392, + "framework": 189874.055, + "kernel_overhead": 22318.444, + "profiling_overhead": 56651.226, + "profiling_runs": 42681.993, "runtimes": [ - 1730.848 + 1744.864 ], - "search_algorithm": 19.03, - "validation": 11.66 + "search_algorithm": 22.988, + "validation": 17.411 }, - "timestamp": "2026-01-27 09:24:59 UTC" + "timestamp": "2026-03-02 14:27:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -6696,61 +7206,61 @@ { "name": "time", "unit": "", - "value": 1725.632 + "value": 1688.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 59.60361721079741 + "value": 28.13852015732547 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100492.0 + "value": 256.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839412.0 + "value": 1831224.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.971019940782175 + "value": 3.5396800085613593 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2129043.0 + "value": 28172.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099494.0 + "value": 2098988.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.2475482172929455 + "value": 1.497302452079513 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0701891875064617 + "value": 0.0701041349915365 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6780,13 +7290,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.27653118566319 + "value": 81.50782647476196 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.8957761537262 + "value": 99.86654762522814 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6798,7 +7308,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6822,13 +7332,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 83361792.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -6840,13 +7350,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 151257088.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.23608453242712 + "value": 38.015611923075845 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6858,13 +7368,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.93162096732067 + "value": 95.84341762532802 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5340627864647227 + "value": 1.158263958118588 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6876,7 +7386,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 54.05640077856311 + "value": 53.626461008908436 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6895,21 +7405,38 @@ "time" ], "times": { - "compilation": 12820.868, - "data": 65937.843, - "framework": 185535.539, - "kernel_overhead": 22051.137, - "profiling_overhead": 55328.782, - "profiling_runs": 42217.777, + "compilation": 14510.651, + "data": 68954.83, + "framework": 190739.36500000002, + "kernel_overhead": 22152.1, + "profiling_overhead": 57275.768, + "profiling_runs": 42356.667, "runtimes": [ - 1725.632 + 1688.8 ], - "search_algorithm": 22.698, - "validation": 15.01 + "search_algorithm": 22.55, + "validation": 14.589 }, - "timestamp": "2026-01-27 09:25:0 UTC" + "timestamp": "2026-03-02 14:27:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 19 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -6926,61 +7453,61 @@ { "name": "time", "unit": "", - "value": 5245.76 + "value": 5040.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.658575546556662 + "value": 9.572542672160095 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2112192.0 + "value": 4272.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1877096.0 + "value": 1869312.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.3279177803376285 + "value": 1.2118451600012847 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2190544.0 + "value": 90559.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103788.0 + "value": 2100420.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.09877989895043 + "value": 7.954638179949114 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3680333338524776 + "value": 0.3727980307963393 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7010,13 +7537,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.78385594024182 + "value": 97.57303958386943 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96959227253545 + "value": 99.96974007174572 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7028,7 +7555,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7040,7 +7567,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7683964928.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7052,13 +7579,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3850371072.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -7070,13 +7597,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 716439552.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.90008102248743 + "value": 36.265524109943925 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7088,13 +7615,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.4150637623831 + "value": 31.821727859973986 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.356385795174219 + "value": 4.288475043629306 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7106,7 +7633,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.8455066068534 + "value": 84.80660323363401 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7125,21 +7652,38 @@ "time" ], "times": { - "compilation": 13194.803, - "data": 64855.95, - "framework": 2051583.2650000001, - "kernel_overhead": 947772.671, - "profiling_overhead": 53638.459, - "profiling_runs": 985316.185, + "compilation": 48903.768, + "data": 65025.973, + "framework": 2047210.12, + "kernel_overhead": 945847.631, + "profiling_overhead": 53378.434, + "profiling_runs": 982958.082, "runtimes": [ - 5245.76 + 5040.704 ], - "search_algorithm": 24.132, - "validation": 14.706 + "search_algorithm": 22.451, + "validation": 16.967 }, - "timestamp": "2026-01-27 09:25:1 UTC" + "timestamp": "2026-03-02 14:27:14 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 19 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -7156,61 +7700,61 @@ { "name": "time", "unit": "", - "value": 5257.984 + "value": 5220.64 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.784865036509395 + "value": 9.308095767389576 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097604.0 + "value": 12580.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870056.0 + "value": 1873964.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.3211492610706386 + "value": 1.1941739684220447 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2172143.0 + "value": 103841.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099302.0 + "value": 2101615.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.098311849560526 + "value": 7.954787536510024 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.36803655805849333 + "value": 0.3727704927643376 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7240,13 +7784,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.12596954402618 + "value": 97.19801961609451 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96958498640417 + "value": 99.96279201284311 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7258,7 +7802,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7270,7 +7814,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7683964928.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7282,13 +7826,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3850371072.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -7300,13 +7844,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 716439552.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.89999755322732 + "value": 36.26506510243737 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7318,13 +7862,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.415341267975933 + "value": 31.821588888596924 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.356424277395099 + "value": 4.2884563150648205 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7336,7 +7880,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.84626949375024 + "value": 84.80625586515987 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7355,21 +7899,38 @@ "time" ], "times": { - "compilation": 13325.61, - "data": 61449.316, - "framework": 2043767.9100000001, - "kernel_overhead": 946687.976, - "profiling_overhead": 51322.06, - "profiling_runs": 984308.558, + "compilation": 14183.07, + "data": 66699.572, + "framework": 2057459.585, + "kernel_overhead": 948511.36, + "profiling_overhead": 56129.21, + "profiling_runs": 986119.443, "runtimes": [ - 5257.984 + 5220.64 ], - "search_algorithm": 20.763, - "validation": 15.178 + "search_algorithm": 26.615, + "validation": 16.853 }, - "timestamp": "2026-01-27 09:25:2 UTC" + "timestamp": "2026-03-02 14:27:15 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 19 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -7386,61 +7947,61 @@ { "name": "time", "unit": "", - "value": 5287.072 + "value": 5302.272 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.505866068028936 + "value": 9.267846384576929 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097340.0 + "value": 10104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837040.0 + "value": 1840808.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.3294566335991944 + "value": 1.2093389561312229 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2183753.0 + "value": 98475.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2108945.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.098909629511681 + "value": 7.955036589733708 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3680434062390291 + "value": 0.37278035962019357 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7470,13 +8031,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.66711717460329 + "value": 97.85859106171799 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96858783279289 + "value": 99.96580386566983 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7488,7 +8049,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7500,7 +8061,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7683964928.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7512,13 +8073,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3850371072.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -7530,13 +8091,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 716439552.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.900946827675696 + "value": 36.264876633102254 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7548,13 +8109,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.416239187314186 + "value": 31.821472401037948 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.356548793553334 + "value": 4.288440616546129 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7566,7 +8127,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.84864446961473 + "value": 84.80592348883233 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7585,21 +8146,38 @@ "time" ], "times": { - "compilation": 13579.319, - "data": 61943.681, - "framework": 2044157.386, - "kernel_overhead": 946643.329, - "profiling_overhead": 51654.682, - "profiling_runs": 983915.694, + "compilation": 14163.561, + "data": 63883.455, + "framework": 2058825.6609999998, + "kernel_overhead": 951616.288, + "profiling_overhead": 54241.445, + "profiling_runs": 989084.473, "runtimes": [ - 5287.072 + 5302.272 ], - "search_algorithm": 35.031, - "validation": 19.409 + "search_algorithm": 31.338, + "validation": 19.367 }, - "timestamp": "2026-01-27 09:25:3 UTC" + "timestamp": "2026-03-02 14:27:16 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 19 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -7616,61 +8194,61 @@ { "name": "time", "unit": "", - "value": 5290.304 + "value": 5223.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.517159755803476 + "value": 9.298409619735693 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098640.0 + "value": 8268.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837968.0 + "value": 1843392.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.330943160150712 + "value": 1.2101768209131532 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2185219.0 + "value": 95355.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100213.0 + "value": 2101162.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.09924838159135 + "value": 7.954313311081076 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3680463963696152 + "value": 0.37278002035000307 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7700,13 +8278,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.58195526341959 + "value": 97.09506735108882 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9695387778197 + "value": 99.96908466832195 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7718,7 +8296,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7730,7 +8308,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7683964928.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7742,13 +8320,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3850371072.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -7760,13 +8338,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 716439552.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.90112260578342 + "value": 36.26412953981809 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7778,13 +8356,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.41619558067693 + "value": 31.820399118462344 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.356542746539184 + "value": 4.288295974949027 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7796,7 +8374,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.84854961963998 + "value": 84.8030850633873 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7815,21 +8393,38 @@ "time" ], "times": { - "compilation": 12812.341, - "data": 61448.954, - "framework": 2050034.003, - "kernel_overhead": 949746.544, - "profiling_overhead": 51463.216, - "profiling_runs": 987375.289, + "compilation": 13286.831, + "data": 61610.138, + "framework": 2050654.9640000002, + "kernel_overhead": 949914.734, + "profiling_overhead": 51830.53, + "profiling_runs": 987299.562, "runtimes": [ - 5290.304 + 5223.776 ], - "search_algorithm": 21.259, - "validation": 15.557 + "search_algorithm": 23.685, + "validation": 16.481 }, - "timestamp": "2026-01-27 09:25:4 UTC" + "timestamp": "2026-03-02 14:27:17 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 19 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -7846,61 +8441,61 @@ { "name": "time", "unit": "", - "value": 5349.504 + "value": 5284.096 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.42732514129996 + "value": 9.218901823156388 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104576.0 + "value": 7624.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837536.0 + "value": 1838304.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.3209273226654155 + "value": 1.2024433487824826 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2190312.0 + "value": 92251.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101158.0 + "value": 2101137.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.098158220455524 + "value": 7.958508701782195 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3679858040372277 + "value": 0.3729185929075591 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7930,13 +8525,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.97785880981384 + "value": 96.87309587291863 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96542014707303 + "value": 99.96016633730657 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7948,7 +8543,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7960,7 +8555,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7683964928.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7972,13 +8567,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3850371072.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -7990,13 +8585,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 716439552.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.896491937166225 + "value": 36.28072737012859 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8008,13 +8603,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.412317611074293 + "value": 31.835067668251565 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.356004981223193 + "value": 4.2902727912292145 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8026,7 +8621,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.83819900824565 + "value": 84.84221122305415 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8045,21 +8640,38 @@ "time" ], "times": { - "compilation": 13965.338, - "data": 64447.491, - "framework": 2051829.1469999999, - "kernel_overhead": 947604.777, - "profiling_overhead": 54395.159, - "profiling_runs": 985381.72, + "compilation": 13485.982, + "data": 61621.14, + "framework": 2062871.89, + "kernel_overhead": 955930.933, + "profiling_overhead": 51710.912, + "profiling_runs": 993608.905, "runtimes": [ - 5349.504 + 5284.096 ], - "search_algorithm": 21.735, - "validation": 11.549 + "search_algorithm": 38.967, + "validation": 18.585 }, - "timestamp": "2026-01-27 09:25:5 UTC" + "timestamp": "2026-03-02 14:27:18 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -8076,61 +8688,61 @@ { "name": "time", "unit": "", - "value": 6289.568 + "value": 5631.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.589757293428203 + "value": 8.743178697412345 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2120756.0 + "value": 12728.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872380.0 + "value": 1878104.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.0182744976900633 + "value": 1.11394562871978 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2212600.0 + "value": 112034.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2107493.0 + "value": 2108815.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.613005035480322 + "value": 3.624653242658123 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15935632907756314 + "value": 0.1698607247114455 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8160,13 +8772,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.76699859556523 + "value": 98.79935624600607 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96838717110225 + "value": 99.96696447010724 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8178,7 +8790,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8190,7 +8802,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 9781116928.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -8202,13 +8814,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -8220,13 +8832,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 788135936.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.387698864082584 + "value": 33.827513401376734 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8238,13 +8850,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.205413896850246 + "value": 28.999143704873966 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9925840256482115 + "value": 2.01068281547466 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8256,7 +8868,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.8764338250357 + "value": 85.01546208575631 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8275,21 +8887,38 @@ "time" ], "times": { - "compilation": 12816.291, - "data": 59625.575, - "framework": 2625395.2309999997, - "kernel_overhead": 1237036.192, - "profiling_overhead": 49599.192, - "profiling_runs": 1279134.272, + "compilation": 51196.826, + "data": 60633.808, + "framework": 2642218.2739999997, + "kernel_overhead": 1245498.89, + "profiling_overhead": 50850.34, + "profiling_runs": 1285235.236, "runtimes": [ - 6289.568 + 5631.296 ], - "search_algorithm": 23.22, - "validation": 16.499 + "search_algorithm": 23.971, + "validation": 16.965 }, - "timestamp": "2026-01-27 09:25:6 UTC" + "timestamp": "2026-03-02 14:27:20 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -8306,61 +8935,61 @@ { "name": "time", "unit": "", - "value": 6101.152 + "value": 5729.184 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.08864585623214 + "value": 8.561942755380596 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2108616.0 + "value": 452.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871480.0 + "value": 1870364.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.02559165757116 + "value": 1.1024124613080444 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2197426.0 + "value": 96152.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102827.0 + "value": 2099278.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.613077505219231 + "value": 3.624717112803529 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15936527281170798 + "value": 0.1698702054357429 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8390,13 +9019,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6631450823399 + "value": 98.68932169450795 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97348056177815 + "value": 99.97322007115042 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8408,7 +9037,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8420,7 +9049,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 9781116928.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -8432,13 +9061,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -8450,13 +9079,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 788135936.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.387779151626592 + "value": 33.82699861229088 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8468,13 +9097,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.205554653457398 + "value": 28.998947625241282 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9925943349700241 + "value": 2.010669220109503 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8486,7 +9115,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.87686341048293 + "value": 85.01490755761893 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8505,21 +9134,38 @@ "time" ], "times": { - "compilation": 13416.071, - "data": 62867.898, - "framework": 2631977.717, - "kernel_overhead": 1237222.732, - "profiling_overhead": 52661.391, - "profiling_runs": 1279225.696, + "compilation": 14112.735, + "data": 64938.396, + "framework": 2650753.721, + "kernel_overhead": 1245459.925, + "profiling_overhead": 55161.518, + "profiling_runs": 1285193.882, "runtimes": [ - 6101.152 + 5729.184 ], - "search_algorithm": 24.035, - "validation": 15.018 + "search_algorithm": 21.763, + "validation": 17.532 }, - "timestamp": "2026-01-27 09:25:8 UTC" + "timestamp": "2026-03-02 14:27:21 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -8536,61 +9182,61 @@ { "name": "time", "unit": "", - "value": 6066.816 + "value": 5733.312 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.021339811835983 + "value": 8.46964515845586 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104252.0 + "value": 12304.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837204.0 + "value": 1838388.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.026055794173669 + "value": 1.1085582892731665 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2204246.0 + "value": 106189.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100945.0 + "value": 2101548.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6130771390551994 + "value": 3.6245812309074075 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1593656345089629 + "value": 0.16986547961735202 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8620,13 +9266,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.97606495883939 + "value": 98.90376443190702 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97376724268435 + "value": 99.97090632039065 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8638,7 +9284,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8650,7 +9296,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 9781116928.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -8662,13 +9308,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -8680,13 +9326,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 788135936.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.388014737507046 + "value": 33.82766003376139 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8698,13 +9344,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.205538385724147 + "value": 28.998812009484638 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9925931434856554 + "value": 2.010659817063876 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8716,7 +9362,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.87679750622135 + "value": 85.01449107394457 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8735,21 +9381,38 @@ "time" ], "times": { - "compilation": 13501.712, - "data": 62800.249, - "framework": 2636304.635, - "kernel_overhead": 1239649.319, - "profiling_overhead": 52746.612, - "profiling_runs": 1281108.455, + "compilation": 14569.801, + "data": 63214.676, + "framework": 2652139.545, + "kernel_overhead": 1247636.846, + "profiling_overhead": 53559.288, + "profiling_runs": 1287728.735, "runtimes": [ - 6066.816 + 5733.312 ], - "search_algorithm": 23.734, - "validation": 13.416 + "search_algorithm": 27.741, + "validation": 16.008 }, - "timestamp": "2026-01-27 09:25:9 UTC" + "timestamp": "2026-03-02 14:27:23 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -8766,61 +9429,61 @@ { "name": "time", "unit": "", - "value": 6071.776 + "value": 5702.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.05804454898232 + "value": 8.42452904790356 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110136.0 + "value": 1488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839732.0 + "value": 1839432.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.0310810750738773 + "value": 1.1055722680696183 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210176.0 + "value": 98455.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102700.0 + "value": 2103070.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6131660458607695 + "value": 3.624700573383649 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15937069327548536 + "value": 0.16986132933285572 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8850,13 +9513,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.74242570639092 + "value": 98.65745623607953 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97622850998778 + "value": 99.96732969934732 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8868,7 +9531,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8880,7 +9543,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 9781116928.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -8892,13 +9555,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -8910,13 +9573,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 788135936.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.38805004693271 + "value": 33.82749552994452 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8928,13 +9591,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.205732193593988 + "value": 28.999140979352028 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9926073383979972 + "value": 2.010682626498041 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8946,7 +9609,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.87738467716619 + "value": 85.0154744051495 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8965,21 +9628,38 @@ "time" ], "times": { - "compilation": 14062.015, - "data": 64368.068, - "framework": 2645266.66, - "kernel_overhead": 1243019.204, - "profiling_overhead": 53358.543, - "profiling_runs": 1284520.845, + "compilation": 14911.855, + "data": 68516.999, + "framework": 2660516.805, + "kernel_overhead": 1247332.125, + "profiling_overhead": 56986.308, + "profiling_runs": 1287681.373, "runtimes": [ - 6071.776 + 5702.176 ], - "search_algorithm": 22.344, - "validation": 12.409 + "search_algorithm": 25.773, + "validation": 18.631 }, - "timestamp": "2026-01-27 09:25:11 UTC" + "timestamp": "2026-03-02 14:27:24 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -8996,61 +9676,61 @@ { "name": "time", "unit": "", - "value": 6163.232 + "value": 5702.368 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.917343412881817 + "value": 8.507094919065802 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113072.0 + "value": 16200.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841288.0 + "value": 1843176.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.040531597464955 + "value": 1.1123020992377899 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2211166.0 + "value": 110576.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105540.0 + "value": 2106498.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.621036173480606 + "value": 3.6210866215057917 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15972317249761425 + "value": 0.169701553895452 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9080,13 +9760,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.47825269324801 + "value": 98.54019756391658 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97502822500704 + "value": 99.97258160439574 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9098,7 +9778,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9110,7 +9790,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 9781116928.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -9122,13 +9802,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -9140,13 +9820,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 788135936.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.45092042393331 + "value": 33.79399985466882 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9158,13 +9838,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.26623030127273 + "value": 28.970341734395117 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9970383521439987 + "value": 2.0086858038496613 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9176,7 +9856,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 80.0550221201698 + "value": 84.93103610722204 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9195,21 +9875,38 @@ "time" ], "times": { - "compilation": 13119.551, - "data": 65073.81, - "framework": 2646378.6610000003, - "kernel_overhead": 1242182.133, - "profiling_overhead": 55140.34, - "profiling_runs": 1283982.378, + "compilation": 18844.701, + "data": 62463.765, + "framework": 2660535.871, + "kernel_overhead": 1252560.574, + "profiling_overhead": 52689.486, + "profiling_runs": 1292822.046, "runtimes": [ - 6163.232 + 5702.368 ], - "search_algorithm": 23.264, - "validation": 14.273 + "search_algorithm": 32.311, + "validation": 24.072 }, - "timestamp": "2026-01-27 09:25:12 UTC" + "timestamp": "2026-03-02 14:27:25 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -9226,61 +9923,61 @@ { "name": "time", "unit": "", - "value": 8372.927 + "value": 8060.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.50791356252932 + "value": 6.184620592042736 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098096.0 + "value": 2840.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1929188.0 + "value": 1929564.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.22737008075276 + "value": 48.5773008991906 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2986769.0 + "value": 858275.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418787.0 + "value": 138421889.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.3869886120799175 + "value": 1.252531365625944 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0576859511658942 + "value": 0.0587562186414145 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9310,13 +10007,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.80347159782782 + "value": 90.72727249119923 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.8767474347051 + "value": 99.7135115837206 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9328,7 +10025,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9340,7 +10037,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3709861888.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -9352,13 +10049,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -9370,13 +10067,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 396296192.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.716743705108334 + "value": 10.51608919320568 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9388,13 +10085,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.71443654673584 + "value": 20.113077600418634 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.752453917335906 + "value": 10.891310087336068 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9406,7 +10103,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.10506707660855 + "value": 29.61504097167848 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9425,21 +10122,38 @@ "time" ], "times": { - "compilation": 13807.447, - "data": 60981.938, - "framework": 897746.2690000001, - "kernel_overhead": 366328.469, - "profiling_overhead": 50922.915, - "profiling_runs": 419512.947, + "compilation": 60794.613, + "data": 63849.84, + "framework": 909841.787, + "kernel_overhead": 369762.414, + "profiling_overhead": 53552.057, + "profiling_runs": 422677.476, "runtimes": [ - 8372.927 + 8060.864 ], - "search_algorithm": 36.353, - "validation": 14.051 + "search_algorithm": 32.709, + "validation": 25.389 }, - "timestamp": "2026-01-27 09:25:12 UTC" + "timestamp": "2026-03-02 14:27:26 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -9456,61 +10170,61 @@ { "name": "time", "unit": "", - "value": 8655.36 + "value": 8162.719 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.979340888875374 + "value": 6.242340999664487 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101872.0 + "value": 18208.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1931988.0 + "value": 1934612.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.04614392377234 + "value": 49.24921855338431 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 3376217.0 + "value": 1187902.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424270.0 + "value": 138425285.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4151636575543471 + "value": 1.276159495206637 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059314743831312494 + "value": 0.05955755108248684 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9540,13 +10254,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.47321787140652 + "value": 97.53313421125242 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.9189829378639 + "value": 99.6917646515126 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9558,7 +10272,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9570,7 +10284,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3709861888.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -9582,13 +10296,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -9600,13 +10314,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 396296192.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.8894086491397 + "value": 10.662595036670414 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9618,13 +10332,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.061735303278123 + "value": 20.39183227142029 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.941874186407063 + "value": 11.042256830568897 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9636,7 +10350,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.61781621006878 + "value": 30.025508635859254 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9655,21 +10369,38 @@ "time" ], "times": { - "compilation": 13102.598, - "data": 65294.618, - "framework": 912863.9619999999, - "kernel_overhead": 369096.882, - "profiling_overhead": 55058.911, - "profiling_runs": 423413.551, + "compilation": 24018.314, + "data": 62821.551, + "framework": 915942.1410000001, + "kernel_overhead": 373641.612, + "profiling_overhead": 53095.801, + "profiling_runs": 426383.177, "runtimes": [ - 8655.36 + 8162.719 ], - "search_algorithm": 24.239, - "validation": 16.289 + "search_algorithm": 34.306, + "validation": 25.689 }, - "timestamp": "2026-01-27 09:25:13 UTC" + "timestamp": "2026-03-02 14:27:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -9686,61 +10417,61 @@ { "name": "time", "unit": "", - "value": 8136.192 + "value": 8385.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.833998928504242 + "value": 6.203995753375101 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109064.0 + "value": 18788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1907760.0 + "value": 1912904.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.49819756868054 + "value": 48.91948830368562 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2803090.0 + "value": 702128.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416730.0 + "value": 138425531.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4172562677691027 + "value": 1.2685177925548012 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05929601329373381 + "value": 0.05949840059312468 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9770,13 +10501,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.82169696189482 + "value": 91.49477160675931 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.62058571811028 + "value": 98.88878759710131 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9788,7 +10519,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9800,7 +10531,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3709861888.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -9812,13 +10543,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -9830,13 +10561,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 396296192.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.914244008578772 + "value": 10.738578719287267 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9848,13 +10579,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.114875824346296 + "value": 20.53699706097775 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.97085756630606 + "value": 11.12086413116422 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9866,7 +10597,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.696248821107563 + "value": 30.239232707174757 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9885,21 +10616,38 @@ "time" ], "times": { - "compilation": 13037.085, - "data": 63156.816, - "framework": 906775.8049999999, - "kernel_overhead": 369340.913, - "profiling_overhead": 52916.404, - "profiling_runs": 421361.672, + "compilation": 25329.644, + "data": 62504.348, + "framework": 911986.6509999998, + "kernel_overhead": 371833.562, + "profiling_overhead": 52616.098, + "profiling_runs": 425032.643, "runtimes": [ - 8136.192 + 8385.44 ], - "search_algorithm": 21.467, - "validation": 12.266 + "search_algorithm": 35.903, + "validation": 27.142 }, - "timestamp": "2026-01-27 09:25:13 UTC" + "timestamp": "2026-03-02 14:27:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -9916,61 +10664,61 @@ { "name": "time", "unit": "", - "value": 8091.679 + "value": 8036.992 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.907896705973037 + "value": 6.2623527480153 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2117484.0 + "value": 23328.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912192.0 + "value": 1917316.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.70830635768764 + "value": 49.17345212913307 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 3038058.0 + "value": 908566.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420407.0 + "value": 138429922.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4227037380725192 + "value": 1.2772747716529027 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05944712864152063 + "value": 0.05970321409833485 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10000,13 +10748,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.66539987271042 + "value": 97.63809629812862 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.52863262992805 + "value": 98.85814422132965 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10018,7 +10766,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10030,7 +10778,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3709861888.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -10042,13 +10790,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -10060,13 +10808,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 396296192.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.049757274126256 + "value": 10.7796138793179 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10078,13 +10826,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.387386061811686 + "value": 20.614080144245097 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.119487417501784 + "value": 11.162604921859284 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10096,7 +10844,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.098585708009967 + "value": 30.35275266846908 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10115,21 +10863,38 @@ "time" ], "times": { - "compilation": 13425.064, - "data": 61751.747, - "framework": 903025.9639999999, - "kernel_overhead": 368611.46, - "profiling_overhead": 51648.84, - "profiling_runs": 421013.917, + "compilation": 27418.82, + "data": 63688.572, + "framework": 917240.882, + "kernel_overhead": 373512.651, + "profiling_overhead": 53852.778, + "profiling_runs": 426186.881, "runtimes": [ - 8091.679 + 8036.992 ], - "search_algorithm": 24.834, - "validation": 13.499 + "search_algorithm": 37.781, + "validation": 29.368 }, - "timestamp": "2026-01-27 09:25:14 UTC" + "timestamp": "2026-03-02 14:27:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -10146,61 +10911,61 @@ { "name": "time", "unit": "", - "value": 8199.263 + "value": 8009.504 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.697101708398959 + "value": 6.194931691947496 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2132144.0 + "value": 800.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912856.0 + "value": 1909372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.67212986098539 + "value": 49.188753134267664 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2849497.0 + "value": 698305.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419779.0 + "value": 138417838.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4247375382530494 + "value": 1.2768598513929923 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059642775305308836 + "value": 0.05984099622012652 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10230,13 +10995,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.32627096462475 + "value": 96.41365317820662 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93215895197626 + "value": 99.98529108203337 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10248,7 +11013,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10260,7 +11025,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3709861888.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -10272,13 +11037,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -10290,13 +11055,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 396296192.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.042002665450585 + "value": 10.682977412304158 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10308,13 +11073,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.371887806402167 + "value": 20.42873155516926 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.111034511597277 + "value": 11.062237936856793 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10326,7 +11091,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.07570503490274 + "value": 30.079840181358215 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10345,21 +11110,38 @@ "time" ], "times": { - "compilation": 13285.056, - "data": 63038.863, - "framework": 907832.749, - "kernel_overhead": 369598.48, - "profiling_overhead": 52794.055, - "profiling_runs": 422401.351, + "compilation": 27499.251, + "data": 61914.431, + "framework": 910273.959, + "kernel_overhead": 371871.359, + "profiling_overhead": 51926.808, + "profiling_runs": 424561.361, "runtimes": [ - 8199.263 + 8009.504 ], - "search_algorithm": 22.2, - "validation": 18.276 + "search_algorithm": 38.191, + "validation": 28.099 }, - "timestamp": "2026-01-27 09:25:15 UTC" + "timestamp": "2026-03-02 14:27:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -10376,61 +11158,61 @@ { "name": "time", "unit": "", - "value": 8455.552 + "value": 8049.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.960110243988515 + "value": 6.5059229752296135 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113768.0 + "value": 11880.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2022784.0 + "value": 2022748.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.49860892758552 + "value": 54.22798531432576 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 19447772.0 + "value": 17214324.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425493.0 + "value": 138420513.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7821785225803504 + "value": 0.629185588519264 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029177966361281067 + "value": 0.029330501234110438 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10460,13 +11242,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.55573293074143 + "value": 88.42199287317905 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.33535564326992 + "value": 98.41614763737418 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10478,7 +11260,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10490,7 +11272,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3499098112.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -10502,13 +11284,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2269118464.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -10520,13 +11302,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 368082944.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.78034561818413 + "value": 9.195118635589191 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10538,13 +11320,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.05210018827511 + "value": 20.34519333446138 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.608618434568399 + "value": 10.684206753522078 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10556,7 +11338,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.4960435574486 + "value": 27.81849368346201 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10575,21 +11357,38 @@ "time" ], "times": { - "compilation": 13541.881, - "data": 61375.302, - "framework": 870898.0719999999, - "kernel_overhead": 352384.203, - "profiling_overhead": 51167.101, - "profiling_runs": 405971.466, + "compilation": 87761.675, + "data": 61438.336, + "framework": 872988.416, + "kernel_overhead": 353419.37, + "profiling_overhead": 51590.114, + "profiling_runs": 406540.596, "runtimes": [ - 8455.552 + 8049.76 ], - "search_algorithm": 42.072, - "validation": 16.435 + "search_algorithm": 36.389, + "validation": 26.36 }, - "timestamp": "2026-01-27 09:25:15 UTC" + "timestamp": "2026-03-02 14:27:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -10606,61 +11405,61 @@ { "name": "time", "unit": "", - "value": 8750.976 + "value": 8167.616 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.122179846204226 + "value": 6.480627915386742 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101652.0 + "value": 19164.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2020696.0 + "value": 2022352.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.452670511307275 + "value": 54.508455844985804 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 20141873.0 + "value": 18176753.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424134.0 + "value": 138421402.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7853208275720851 + "value": 0.6316692278440132 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029459221896794143 + "value": 0.029444467155065903 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10690,13 +11489,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.87277790260781 + "value": 89.84592561618177 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.4700829980253 + "value": 98.66422769867958 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10708,7 +11507,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10720,7 +11519,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3499098112.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -10732,13 +11531,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2269118464.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -10750,13 +11549,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 368082944.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.765424622710679 + "value": 9.207656559502038 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10768,13 +11567,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.016733553682247 + "value": 20.37289168868039 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.589907619831404 + "value": 10.698752446863164 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10786,7 +11585,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.447569089675888 + "value": 27.856386754387458 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10805,21 +11604,38 @@ "time" ], "times": { - "compilation": 13166.119, - "data": 63835.1, - "framework": 874107.395, - "kernel_overhead": 350997.405, - "profiling_overhead": 53608.66, - "profiling_runs": 405666.23, + "compilation": 26257.373, + "data": 62407.61, + "framework": 882208.0659999999, + "kernel_overhead": 357116.584, + "profiling_overhead": 52374.961, + "profiling_runs": 410308.911, "runtimes": [ - 8750.976 + 8167.616 ], - "search_algorithm": 52.093, - "validation": 16.195 + "search_algorithm": 49.483, + "validation": 28.136 }, - "timestamp": "2026-01-27 09:25:16 UTC" + "timestamp": "2026-03-02 14:27:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -10836,61 +11652,61 @@ { "name": "time", "unit": "", - "value": 8153.599 + "value": 8186.847 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.043141583933185 + "value": 6.380957784145955 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110224.0 + "value": 4508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999672.0 + "value": 2002000.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.00006790835147 + "value": 53.6866653460216 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 16676094.0 + "value": 15077355.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417558.0 + "value": 138423319.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7861217554769574 + "value": 0.6324981297719346 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029448851373770464 + "value": 0.029545391211106897 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10920,13 +11736,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.98535185292054 + "value": 88.82228191852329 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.50594791665793 + "value": 99.57484724842796 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10938,7 +11754,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10950,7 +11766,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3499098112.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -10962,13 +11778,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2269118464.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -10980,13 +11796,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 368082944.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.846354361996726 + "value": 9.154037602257668 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10998,13 +11814,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.203565339965447 + "value": 20.255771704201535 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.688751487232695 + "value": 10.63724729876404 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11016,7 +11832,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.703738488896423 + "value": 27.69622343215775 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11035,21 +11851,38 @@ "time" ], "times": { - "compilation": 13022.808, - "data": 62257.937, - "framework": 869658.561, - "kernel_overhead": 351405.027, - "profiling_overhead": 52256.104, - "profiling_runs": 403739.493, + "compilation": 28404.935, + "data": 61859.761, + "framework": 875972.183, + "kernel_overhead": 354547.332, + "profiling_overhead": 51995.282, + "profiling_runs": 407569.808, "runtimes": [ - 8153.599 + 8186.847 ], - "search_algorithm": 24.231, - "validation": 16.712 + "search_algorithm": 36.989, + "validation": 24.629 }, - "timestamp": "2026-01-27 09:25:16 UTC" + "timestamp": "2026-03-02 14:27:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -11066,61 +11899,61 @@ { "name": "time", "unit": "", - "value": 8434.784 + "value": 8147.968 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.935391472649185 + "value": 6.383817084516698 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103256.0 + "value": 5324.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1998716.0 + "value": 1997164.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.23330589891538 + "value": 53.92331861241032 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17275965.0 + "value": 15772037.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419030.0 + "value": 138415261.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7897245646693194 + "value": 0.6337873329365352 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02943297756748656 + "value": 0.029581964092194813 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11150,13 +11983,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.03306672492435 + "value": 90.22455370436917 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.06716168106088 + "value": 99.58025848902223 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11168,7 +12001,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11180,7 +12013,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3499098112.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -11192,13 +12025,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2269118464.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -11210,13 +12043,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 368082944.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.881340760663099 + "value": 9.164616694962866 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11228,13 +12061,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.28211199868471 + "value": 20.279743321310942 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.730306811804141 + "value": 10.649835909213827 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11246,7 +12079,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.811464286890814 + "value": 27.72902255952332 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11265,21 +12098,38 @@ "time" ], "times": { - "compilation": 13442.369, - "data": 64343.311, - "framework": 872577.013, - "kernel_overhead": 350758.324, - "profiling_overhead": 53655.935, - "profiling_runs": 403819.443, + "compilation": 27597.985, + "data": 63292.263, + "framework": 883302.536, + "kernel_overhead": 357095.196, + "profiling_overhead": 53054.477, + "profiling_runs": 409860.6, "runtimes": [ - 8434.784 + 8147.968 ], - "search_algorithm": 22.346, - "validation": 12.671 + "search_algorithm": 39.042, + "validation": 24.502 }, - "timestamp": "2026-01-27 09:25:17 UTC" + "timestamp": "2026-03-02 14:27:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -11296,61 +12146,61 @@ { "name": "time", "unit": "", - "value": 8152.672 + "value": 8075.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.0121243405383 + "value": 6.527708701358737 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098848.0 + "value": 20364.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1998976.0 + "value": 2004032.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 52.23717955696912 + "value": 51.61483628154505 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 10515812.0 + "value": 8478368.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414975.0 + "value": 138419092.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7955638478044696 + "value": 0.64344332703454 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029637607171316877 + "value": 0.029748484408167606 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11380,13 +12230,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.447162031492 + "value": 85.09917633866216 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.27520216246478 + "value": 98.43864767585366 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11398,7 +12248,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11410,7 +12260,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3499098112.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -11422,13 +12272,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2269118464.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -11440,13 +12290,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 368082944.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.924155834174288 + "value": 9.323495025060293 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11458,13 +12308,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.380322633348193 + "value": 20.63041210824209 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.782265416617562 + "value": 10.833988389850767 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11476,7 +12326,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.94613392279735 + "value": 28.20849745906205 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11495,21 +12345,38 @@ "time" ], "times": { - "compilation": 13559.499, - "data": 65325.797, - "framework": 875179.474, - "kernel_overhead": 351249.128, - "profiling_overhead": 54970.814, - "profiling_runs": 403633.735, + "compilation": 31167.21, + "data": 61774.972, + "framework": 878569.583, + "kernel_overhead": 356038.105, + "profiling_overhead": 51764.166, + "profiling_runs": 408992.34, "runtimes": [ - 8152.672 + 8075.104 ], - "search_algorithm": 21.202, - "validation": 14.168 + "search_algorithm": 53.32, + "validation": 29.118 }, - "timestamp": "2026-01-27 09:25:17 UTC" + "timestamp": "2026-03-02 14:27:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -11526,61 +12393,61 @@ { "name": "time", "unit": "", - "value": 9774.304 + "value": 9279.456 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.612756346823176 + "value": 6.36942070979492 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2117372.0 + "value": 27528.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2238768.0 + "value": 2309228.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 75.34743063764462 + "value": 74.70212450091971 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 119207773.0 + "value": 109139973.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418833.0 + "value": 138429471.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4000727671414845 + "value": 0.2762158037647 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012368593756920091 + "value": 0.012735454857528214 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11610,13 +12477,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.73171676182082 + "value": 92.08386443727893 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.79079513359048 + "value": 97.82728539748061 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11628,7 +12495,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11640,7 +12507,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3393716224.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -11652,13 +12519,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2235564032.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -11670,13 +12537,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 353976320.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.08579249655818 + "value": 7.402098991329166 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11688,13 +12555,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.268755530325887 + "value": 17.774326417722506 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.994846172839424 + "value": 9.188753952521338 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11706,7 +12573,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.771946388020933 + "value": 23.369212426757887 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11725,21 +12592,38 @@ "time" ], "times": { - "compilation": 14201.861, - "data": 61217.71, - "framework": 854834.784, - "kernel_overhead": 340739.437, - "profiling_overhead": 50833.033, - "profiling_runs": 402044.604, + "compilation": 135374.874, + "data": 61325.446, + "framework": 866369.596, + "kernel_overhead": 346332.829, + "profiling_overhead": 51428.395, + "profiling_runs": 407282.926, "runtimes": [ - 9774.304 + 9279.456 ], - "search_algorithm": 23.84, - "validation": 16.791 + "search_algorithm": 42.718, + "validation": 31.837 }, - "timestamp": "2026-01-27 09:25:18 UTC" + "timestamp": "2026-03-02 14:27:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -11756,61 +12640,61 @@ { "name": "time", "unit": "", - "value": 9996.544 + "value": 9508.896 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.438098011621879 + "value": 6.231955657671571 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2117660.0 + "value": 8092.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2231028.0 + "value": 2305880.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.89619322483003 + "value": 77.20380121387784 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 126036574.0 + "value": 121061452.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425775.0 + "value": 138448087.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3987601760645192 + "value": 0.27182010814509167 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012295247310574745 + "value": 0.01256859051542329 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11840,13 +12724,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.49998387877464 + "value": 95.23530513627956 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.45157648474316 + "value": 98.51294660713825 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11858,7 +12742,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11870,7 +12754,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3393716224.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -11882,13 +12766,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2235564032.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -11900,13 +12784,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 353976320.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.994033868671147 + "value": 7.252872710214968 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11918,13 +12802,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.05113477518177 + "value": 17.419350628256574 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.881493174524 + "value": 9.005242909016918 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11936,7 +12820,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.48498880982553 + "value": 22.902517853950734 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11955,21 +12839,38 @@ "time" ], "times": { - "compilation": 14077.196, - "data": 62236.684, - "framework": 863000.2660000001, - "kernel_overhead": 343024.501, - "profiling_overhead": 52066.906, - "profiling_runs": 405672.175, + "compilation": 31465.696, + "data": 60791.522, + "framework": 869611.179, + "kernel_overhead": 348759.284, + "profiling_overhead": 50714.168, + "profiling_runs": 409346.205, "runtimes": [ - 9996.544 + 9508.896 ], - "search_algorithm": 21.145, - "validation": 16.394 + "search_algorithm": 40.48, + "validation": 31.964 }, - "timestamp": "2026-01-27 09:25:18 UTC" + "timestamp": "2026-03-02 14:27:32 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -11986,61 +12887,61 @@ { "name": "time", "unit": "", - "value": 9523.776 + "value": 9398.528 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.351738699431401 + "value": 6.2676507097324405 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100760.0 + "value": 20564.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2171940.0 + "value": 2199480.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.11292013196372 + "value": 72.92190877119107 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115629748.0 + "value": 98308535.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415100.0 + "value": 138424881.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4130193766636857 + "value": 0.28054295223748316 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012695766959774816 + "value": 0.013036858534680017 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12070,13 +12971,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.40843014643502 + "value": 89.10195353762236 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.94098055334575 + "value": 98.45355258691886 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12088,7 +12989,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12100,7 +13001,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3393716224.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12112,13 +13013,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2235564032.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -12130,13 +13031,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 353976320.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.188289540631859 + "value": 7.52621057339055 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12148,13 +13049,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.519488613786926 + "value": 18.079243512960602 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.125446522830666 + "value": 9.346386264329608 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12166,7 +13067,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.102582725431247 + "value": 23.770109192659916 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12185,21 +13086,38 @@ "time" ], "times": { - "compilation": 13466.424, - "data": 63757.121, - "framework": 850635.391, - "kernel_overhead": 336957.182, - "profiling_overhead": 53306.218, - "profiling_runs": 396614.87, + "compilation": 30296.05, + "data": 61171.618, + "framework": 867303.194, + "kernel_overhead": 347413.618, + "profiling_overhead": 51298.714, + "profiling_runs": 407419.244, "runtimes": [ - 9523.776 + 9398.528 ], - "search_algorithm": 22.312, - "validation": 20.607 + "search_algorithm": 41.653, + "validation": 29.863 }, - "timestamp": "2026-01-27 09:25:19 UTC" + "timestamp": "2026-03-02 14:27:32 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -12216,61 +13134,61 @@ { "name": "time", "unit": "", - "value": 9734.624 + "value": 9509.792 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.089535543231662 + "value": 5.8511307814775 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103124.0 + "value": 5716.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2176436.0 + "value": 2195852.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.94170572596197 + "value": 76.37583480007939 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 123082181.0 + "value": 118220948.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416228.0 + "value": 138415459.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.40818227241852506 + "value": 0.27378220858938157 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012532737032885523 + "value": 0.012624479235094637 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12300,13 +13218,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.32791942052192 + "value": 94.42755948731518 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.1036154947303 + "value": 98.72570417019179 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12318,7 +13236,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12330,7 +13248,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3393716224.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12342,13 +13260,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2235564032.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -12360,13 +13278,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 353976320.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.157666155515047 + "value": 7.270002990692716 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12378,13 +13296,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.442133546871013 + "value": 17.459102936288257 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.085154277892896 + "value": 9.025793571189059 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12396,7 +13314,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.000591033490107 + "value": 22.954783174504904 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12415,21 +13333,38 @@ "time" ], "times": { - "compilation": 13973.063, - "data": 62627.654, - "framework": 867696.3089999999, - "kernel_overhead": 345764.783, - "profiling_overhead": 52456.835, - "profiling_runs": 406847.037, + "compilation": 30757.453, + "data": 61056.166, + "framework": 869377.564, + "kernel_overhead": 348348.615, + "profiling_overhead": 51026.657, + "profiling_runs": 408946.126, "runtimes": [ - 9734.624 + 9509.792 ], - "search_algorithm": 22.146, - "validation": 16.66 + "search_algorithm": 39.567, + "validation": 30.15 }, - "timestamp": "2026-01-27 09:25:19 UTC" + "timestamp": "2026-03-02 14:27:33 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -12446,61 +13381,61 @@ { "name": "time", "unit": "", - "value": 8610.784 + "value": 8615.424 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.919931376454635 + "value": 6.742860837925183 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2116484.0 + "value": 21212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2200832.0 + "value": 2206120.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.23147621858555 + "value": 61.328921800421945 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 48094637.0 + "value": 47492954.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420000.0 + "value": 138420473.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.45059930821840216 + "value": 0.2991866994355979 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013965839474813516 + "value": 0.01396465451390183 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12530,13 +13465,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.9609686709972 + "value": 76.62800425837503 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.50397032234108 + "value": 99.01881361884294 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12548,7 +13483,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12560,7 +13495,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3393716224.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12572,13 +13507,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2235564032.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -12590,13 +13525,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 353976320.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.942938322994504 + "value": 8.017007825071438 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12608,13 +13543,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.357621931936166 + "value": 19.25533906082439 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.082882419869582 + "value": 9.95438976105851 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12626,7 +13561,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.526506933645955 + "value": 25.316425167542377 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12645,21 +13580,38 @@ "time" ], "times": { - "compilation": 13179.006, - "data": 64160.457, - "framework": 858224.015, - "kernel_overhead": 342555.254, - "profiling_overhead": 53899.625, - "profiling_runs": 397608.679, + "compilation": 31116.423, + "data": 61932.267, + "framework": 866456.7849999999, + "kernel_overhead": 348112.495, + "profiling_overhead": 51884.067, + "profiling_runs": 404527.956, "runtimes": [ - 8610.784 + 8615.424 ], - "search_algorithm": 32.498, - "validation": 13.262 + "search_algorithm": 52.544, + "validation": 32.881 }, - "timestamp": "2026-01-27 09:25:20 UTC" + "timestamp": "2026-03-02 14:27:33 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 23 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -12676,61 +13628,61 @@ { "name": "time", "unit": "", - "value": 3487.04 + "value": 3352.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.85301702529693 + "value": 14.463997404861429 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097328.0 + "value": 32.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866312.0 + "value": 1869096.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5031290049981583 + "value": 1.8189742325960938 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2140357.0 + "value": 57792.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2098973.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.375503953595346 + "value": 6.059798653871545 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28119387256858003 + "value": 0.28393197024190536 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12760,13 +13712,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.35495715796856 + "value": 98.33114496342763 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96311107478337 + "value": 99.94874611876746 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12778,7 +13730,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12790,7 +13742,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4680843264.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12802,13 +13754,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -12820,13 +13772,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 486146048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.570246381220834 + "value": 41.380837478655295 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12838,13 +13790,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.00813060176659 + "value": 48.48257212121867 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.5162205030590763 + "value": 3.3615845904360606 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12856,7 +13808,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.9450724196685 + "value": 87.59124507612215 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12875,21 +13827,38 @@ "time" ], "times": { - "compilation": 13963.879, - "data": 63861.373, - "framework": 1436027.9610000001, - "kernel_overhead": 645053.407, - "profiling_overhead": 53277.428, - "profiling_runs": 673835.753, + "compilation": 94634.438, + "data": 60083.152, + "framework": 1356435.458, + "kernel_overhead": 608405.435, + "profiling_overhead": 49980.133, + "profiling_runs": 637966.738, "runtimes": [ - 3487.04 + 3352.448 ], - "search_algorithm": 32.068, - "validation": 14.139 + "search_algorithm": 48.021, + "validation": 30.752 }, - "timestamp": "2026-01-27 09:25:20 UTC" + "timestamp": "2026-03-02 14:27:34 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 23 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -12906,61 +13875,61 @@ { "name": "time", "unit": "", - "value": 3480.96 + "value": 3396.768 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.664719815659407 + "value": 14.36681754533704 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107720.0 + "value": 6384.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871852.0 + "value": 1871628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.505854803404582 + "value": 1.8164946909438755 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153279.0 + "value": 64414.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104793.0 + "value": 2099947.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.375396647394928 + "value": 6.059694514856238 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2811768859007743 + "value": 0.2839488485375063 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12990,13 +13959,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.84719217712846 + "value": 98.00250437119865 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96128844508415 + "value": 99.95648274626 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13008,7 +13977,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13020,7 +13989,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4680843264.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13032,13 +14001,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -13050,13 +14019,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 486146048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.5680981508636 + "value": 41.380733644986755 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13068,13 +14037,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.00610577039708 + "value": 48.48170138874522 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.5160721999802544 + "value": 3.361524217383702 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13086,7 +14055,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.94143664745035 + "value": 87.58970447311066 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13105,21 +14074,38 @@ "time" ], "times": { - "compilation": 13296.517, - "data": 63161.365, - "framework": 1429795.178, - "kernel_overhead": 642451.111, - "profiling_overhead": 52748.286, - "profiling_runs": 671434.416, + "compilation": 33275.84, + "data": 60925.981, + "framework": 1360662.2230000002, + "kernel_overhead": 609757.425, + "profiling_overhead": 50514.875, + "profiling_runs": 639463.942, "runtimes": [ - 3480.96 + 3396.768 ], - "search_algorithm": 24.954, - "validation": 16.672 + "search_algorithm": 45.832, + "validation": 34.964 }, - "timestamp": "2026-01-27 09:25:21 UTC" + "timestamp": "2026-03-02 14:27:35 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 23 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -13136,61 +14122,61 @@ { "name": "time", "unit": "", - "value": 3488.672 + "value": 3398.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.634384279433885 + "value": 14.098026663663783 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097348.0 + "value": 5796.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837112.0 + "value": 1837080.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.515352898225868 + "value": 1.8155537973196338 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2155197.0 + "value": 62741.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098996.0 + "value": 2100390.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.376538574298227 + "value": 6.059621764124358 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28120114400442625 + "value": 0.28394497734193946 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13220,13 +14206,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.43270982332766 + "value": 98.406990527254 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95019144928148 + "value": 99.95566663379624 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13238,7 +14224,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13250,7 +14236,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4680843264.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13262,13 +14248,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -13280,13 +14266,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 486146048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.5765944587132 + "value": 41.380419386512884 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13298,13 +14284,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.01557777349685 + "value": 48.481436252365604 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.516765950207289 + "value": 3.361505833904256 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13316,7 +14302,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.95855800203792 + "value": 87.58919223139921 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13335,21 +14321,38 @@ "time" ], "times": { - "compilation": 13664.801, - "data": 64601.184, - "framework": 1434993.831, - "kernel_overhead": 643658.882, - "profiling_overhead": 54509.053, - "profiling_runs": 672224.712, + "compilation": 29809.872, + "data": 60797.85, + "framework": 1360072.788, + "kernel_overhead": 609864.14, + "profiling_overhead": 50400.237, + "profiling_runs": 639010.561, "runtimes": [ - 3488.672 + 3398.912 ], - "search_algorithm": 26.141, - "validation": 14.403 + "search_algorithm": 41.464, + "validation": 19.985 }, - "timestamp": "2026-01-27 09:25:22 UTC" + "timestamp": "2026-03-02 14:27:36 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 23 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -13366,61 +14369,61 @@ { "name": "time", "unit": "", - "value": 3534.72 + "value": 3403.808 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.412015199161424 + "value": 14.142925712778224 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2108924.0 + "value": 11064.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841792.0 + "value": 1841700.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.513222429859306 + "value": 1.822262754727967 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2168478.0 + "value": 69995.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102004.0 + "value": 2108500.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.376710017234346 + "value": 6.059354063475773 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2812317460702538 + "value": 0.28392739425680136 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13450,13 +14453,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.11717151129264 + "value": 97.93001345284247 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.962152374658 + "value": 99.94844728794808 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13468,7 +14471,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13480,7 +14483,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4680843264.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13492,13 +14495,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -13510,13 +14513,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 486146048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.57557262122285 + "value": 41.38019964626191 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13528,13 +14531,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.01505722162164 + "value": 48.4819357058724 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.5167278238492408 + "value": 3.3615404639813877 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13546,7 +14549,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.95764816923604 + "value": 87.59012780330195 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13565,21 +14568,38 @@ "time" ], "times": { - "compilation": 14057.66, - "data": 62223.914, - "framework": 1431120.855, - "kernel_overhead": 643833.453, - "profiling_overhead": 52088.202, - "profiling_runs": 672975.286, + "compilation": 28356.265, + "data": 61497.455, + "framework": 1357287.657, + "kernel_overhead": 607559.519, + "profiling_overhead": 51603.535, + "profiling_runs": 636627.148, "runtimes": [ - 3534.72 + 3403.808 ], - "search_algorithm": 25.59, - "validation": 14.5 + "search_algorithm": 41.388, + "validation": 28.226 }, - "timestamp": "2026-01-27 09:25:23 UTC" + "timestamp": "2026-03-02 14:27:37 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 23 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -13596,61 +14616,61 @@ { "name": "time", "unit": "", - "value": 3476.96 + "value": 3484.384 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.699699717836232 + "value": 13.99995281740578 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097416.0 + "value": 9784.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836668.0 + "value": 1841740.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5223575121466086 + "value": 1.8153360883506287 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153722.0 + "value": 66549.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098946.0 + "value": 2104896.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.390624462355467 + "value": 6.059620144543105 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2818500538446453 + "value": 0.2839578051827152 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13680,13 +14700,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.68227816281507 + "value": 97.80578403456192 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95747853525361 + "value": 99.95539130017573 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13698,7 +14718,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13710,7 +14730,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4680843264.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13722,13 +14742,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -13740,13 +14760,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 486146048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.66322784706977 + "value": 41.38190775977563 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13758,13 +14778,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.12287173942776 + "value": 48.4837600595052 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.5246243949776193 + "value": 3.361666957250849 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13776,7 +14796,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.15290667970875 + "value": 87.59340897025467 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13795,21 +14815,38 @@ "time" ], "times": { - "compilation": 13119.314, - "data": 60890.935, - "framework": 1429569.8199999998, - "kernel_overhead": 644494.467, - "profiling_overhead": 50711.833, - "profiling_runs": 673472.585, + "compilation": 19745.86, + "data": 62509.65, + "framework": 1358657.7179999999, + "kernel_overhead": 607221.671, + "profiling_overhead": 52582.549, + "profiling_runs": 636343.848, "runtimes": [ - 3476.96 + 3484.384 ], - "search_algorithm": 29.613, - "validation": 14.748 + "search_algorithm": 43.755, + "validation": 23.941 }, - "timestamp": "2026-01-27 09:25:23 UTC" + "timestamp": "2026-03-02 14:27:37 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -13826,61 +14863,61 @@ { "name": "time", "unit": "", - "value": 4824.8 + "value": 4072.64 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.668488010851494 + "value": 12.117185704435762 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107912.0 + "value": 16528.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870832.0 + "value": 1873024.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.782611938977219 + "value": 1.5405828752228645 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2169613.0 + "value": 87461.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104722.0 + "value": 2102184.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.6593910167979495 + "value": 2.5396878604707247 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11076771137439176 + "value": 0.1190117837937098 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13910,13 +14947,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.90944692341333 + "value": 98.96617049643093 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95531865300585 + "value": 99.95881382234508 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13928,7 +14965,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13940,7 +14977,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5192548352.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13952,13 +14989,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -13970,13 +15007,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 597360640.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.4667253154901 + "value": 40.96724832328605 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13988,13 +15025,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.825613142586626 + "value": 40.63942669472938 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5329716263841258 + "value": 1.4882602549339372 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14006,7 +15043,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.175438163473 + "value": 90.26848126282322 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14025,21 +15062,38 @@ "time" ], "times": { - "compilation": 13252.476, - "data": 61599.321, - "framework": 2069413.264, - "kernel_overhead": 961267.714, - "profiling_overhead": 51143.373, - "profiling_runs": 995402.856, + "compilation": 48844.595, + "data": 63417.702, + "framework": 1939053.022, + "kernel_overhead": 894953.386, + "profiling_overhead": 53547.666, + "profiling_runs": 927134.268, "runtimes": [ - 4824.8 + 4072.64 ], - "search_algorithm": 38.046, - "validation": 17.62 + "search_algorithm": 29.811, + "validation": 21.877 }, - "timestamp": "2026-01-27 09:25:25 UTC" + "timestamp": "2026-03-02 14:27:38 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -14056,61 +15110,61 @@ { "name": "time", "unit": "", - "value": 4424.672 + "value": 4282.368 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 23.533760122149467 + "value": 11.928254757785467 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097552.0 + "value": 364.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1867928.0 + "value": 1870536.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.771643171654734 + "value": 1.5173546693220548 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2157641.0 + "value": 69956.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099283.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.659349185150098 + "value": 2.539770535466052 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11078440370529113 + "value": 0.11901544934433993 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14140,13 +15194,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82184437042798 + "value": 98.86081955637958 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97590086481797 + "value": 99.96350169883706 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14158,7 +15212,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14170,7 +15224,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5192548352.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -14182,13 +15236,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -14200,13 +15254,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 597360640.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.46500766342329 + "value": 40.96599146614326 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14218,13 +15272,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.82352494047872 + "value": 40.638772504445626 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5328869970994796 + "value": 1.4882362977702255 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14236,7 +15290,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.17081597035677 + "value": 90.26705663391003 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14255,21 +15309,38 @@ "time" ], "times": { - "compilation": 13685.975, - "data": 63637.703, - "framework": 2072399.37, - "kernel_overhead": 961256.995, - "profiling_overhead": 53274.698, - "profiling_runs": 994229.974, + "compilation": 18435.673, + "data": 65158.176, + "framework": 1944635.4619999998, + "kernel_overhead": 896169.613, + "profiling_overhead": 54587.422, + "profiling_runs": 928720.251, "runtimes": [ - 4424.672 + 4282.368 ], - "search_algorithm": 20.763, - "validation": 11.442 + "search_algorithm": 26.742, + "validation": 22.104 }, - "timestamp": "2026-01-27 09:25:26 UTC" + "timestamp": "2026-03-02 14:27:39 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -14286,61 +15357,61 @@ { "name": "time", "unit": "", - "value": 4582.72 + "value": 4194.976 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.98985856097925 + "value": 11.73957775560207 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101668.0 + "value": 11352.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840964.0 + "value": 1840804.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.758738718226112 + "value": 1.5317490750287563 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2178317.0 + "value": 81991.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101741.0 + "value": 2105283.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.6596841450343 + "value": 2.5397596779144793 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1107943925486443 + "value": 0.11901621591099004 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14370,13 +15441,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.03378122918278 + "value": 98.97112029932066 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9745469257261 + "value": 99.96374254350768 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14388,7 +15459,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14400,7 +15471,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5192548352.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -14412,13 +15483,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -14430,13 +15501,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 597360640.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.46835892035542 + "value": 40.96629499532065 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14448,13 +15519,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.82744757159691 + "value": 40.638936342281184 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5330459709192108 + "value": 1.4882422976909613 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14466,7 +15537,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.17951985658043 + "value": 90.26739208971271 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14485,21 +15556,38 @@ "time" ], "times": { - "compilation": 13505.677, - "data": 61396.971, - "framework": 2069337.7449999999, - "kernel_overhead": 961579.173, - "profiling_overhead": 51072.823, - "profiling_runs": 995288.778, + "compilation": 17039.957, + "data": 66272.676, + "framework": 1947224.668, + "kernel_overhead": 895975.045, + "profiling_overhead": 56511.645, + "profiling_runs": 928465.302, "runtimes": [ - 4582.72 + 4194.976 ], - "search_algorithm": 24.484, - "validation": 11.333 + "search_algorithm": 23.727, + "validation": 21.614 }, - "timestamp": "2026-01-27 09:25:27 UTC" + "timestamp": "2026-03-02 14:27:40 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -14516,61 +15604,61 @@ { "name": "time", "unit": "", - "value": 4504.448 + "value": 4120.672 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 23.263110046220774 + "value": 11.722345764648278 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104908.0 + "value": 832.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837776.0 + "value": 1837832.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.7894021750464866 + "value": 1.5166473163703507 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2178263.0 + "value": 68945.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101455.0 + "value": 2099247.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.6595896563983126 + "value": 2.539837528305116 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11078930300788721 + "value": 0.11901301714560333 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14600,13 +15688,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.92490397838861 + "value": 98.89204937762483 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96697672058704 + "value": 99.96224471717508 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14618,7 +15706,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14630,7 +15718,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5192548352.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -14642,13 +15730,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -14660,13 +15748,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 597360640.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.46924492879042 + "value": 40.96601866125311 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14678,13 +15766,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.82857432915749 + "value": 40.638453015237516 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5330916354101911 + "value": 1.4882245977259834 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14696,7 +15784,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.18205265879106 + "value": 90.26634698278957 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14715,21 +15803,38 @@ "time" ], "times": { - "compilation": 13791.908, - "data": 59578.008, - "framework": 2067693.9349999998, - "kernel_overhead": 962573.737, - "profiling_overhead": 49558.49, - "profiling_runs": 995983.7, + "compilation": 15562.202, + "data": 65757.108, + "framework": 1946124.5209999997, + "kernel_overhead": 896283.391, + "profiling_overhead": 55884.839, + "profiling_runs": 928199.183, "runtimes": [ - 4504.448 + 4120.672 ], - "search_algorithm": 31.621, - "validation": 13.359 + "search_algorithm": 23.878, + "validation": 23.571 }, - "timestamp": "2026-01-27 09:25:28 UTC" + "timestamp": "2026-03-02 14:27:41 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -14746,61 +15851,61 @@ { "name": "time", "unit": "", - "value": 4449.952 + "value": 4162.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.903741383144318 + "value": 11.659318235004559 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110244.0 + "value": 8476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1844884.0 + "value": 1840912.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.7438412038273228 + "value": 1.5186864768902628 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2185983.0 + "value": 78395.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103306.0 + "value": 2108799.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.6412548109810827 + "value": 2.5330136797887377 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11003028877399144 + "value": 0.11870473886744588 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14830,13 +15935,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.52074699954228 + "value": 98.09285254553501 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96914534300026 + "value": 99.95913079425816 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14848,7 +15953,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14860,7 +15965,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5192548352.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -14872,13 +15977,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -14890,13 +15995,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 597360640.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.239438941770835 + "value": 40.86027404219632 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14908,13 +16013,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.5685968965678 + "value": 40.53445030798089 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5225554406323862 + "value": 1.4844159048332846 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14926,7 +16031,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.60352901950677 + "value": 90.03532233059875 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14945,21 +16050,38 @@ "time" ], "times": { - "compilation": 13489.687, - "data": 62959.229, - "framework": 2071432.8159999999, - "kernel_overhead": 960983.531, - "profiling_overhead": 52887.716, - "profiling_runs": 994602.34, + "compilation": 15915.696, + "data": 63089.127, + "framework": 1940546.814, + "kernel_overhead": 895968.485, + "profiling_overhead": 53221.368, + "profiling_runs": 928267.834, "runtimes": [ - 4449.952 + 4162.56 ], - "search_algorithm": 23.114, - "validation": 12.078 + "search_algorithm": 25.227, + "validation": 16.143 }, - "timestamp": "2026-01-27 09:25:29 UTC" + "timestamp": "2026-03-02 14:27:42 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -14976,61 +16098,61 @@ { "name": "time", "unit": "", - "value": 8418.336 + "value": 8098.688 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.906943179275737 + "value": 6.506627165331737 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2111436.0 + "value": 12932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2023676.0 + "value": 2018868.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.640745962172765 + "value": 56.748502326790195 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 25843085.0 + "value": 24108366.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424761.0 + "value": 138416940.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7789486442692485 + "value": 0.6315267907101765 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029386342711573595 + "value": 0.029544178621103364 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15060,13 +16182,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.6170714229253 + "value": 90.50131223155846 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95266517663146 + "value": 99.81901433149925 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15078,7 +16200,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15090,7 +16212,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1888485376.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -15102,13 +16224,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1195376640.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -15120,13 +16242,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 267419648.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.78799168027154 + "value": 8.973244812625493 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15138,13 +16260,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.07057699660463 + "value": 20.20539480753887 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.600749391386498 + "value": 5.559443346703199 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15156,7 +16278,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.99491575655513 + "value": 20.050312164295043 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15175,21 +16297,38 @@ "time" ], "times": { - "compilation": 13689.486, - "data": 63571.239, - "framework": 556157.3539999999, - "kernel_overhead": 192870.216, - "profiling_overhead": 53365.947, - "profiling_runs": 246349.952, + "compilation": 52489.988, + "data": 65656.503, + "framework": 563246.879, + "kernel_overhead": 194829.158, + "profiling_overhead": 55941.122, + "profiling_runs": 246820.096, "runtimes": [ - 8418.336 + 8098.688 ], - "search_algorithm": 22.646, - "validation": 13.233 + "search_algorithm": 40.751, + "validation": 18.608 }, - "timestamp": "2026-01-27 09:25:29 UTC" + "timestamp": "2026-03-02 14:27:43 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -15206,61 +16345,61 @@ { "name": "time", "unit": "", - "value": 8671.296 + "value": 8161.6 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.862577667618485 + "value": 6.439562550306105 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109124.0 + "value": 9016.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2019216.0 + "value": 2018624.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.89212500146618 + "value": 57.160817003935385 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 26882997.0 + "value": 25645502.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417484.0 + "value": 138417762.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7795377872975848 + "value": 0.6315664294919627 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029251471553979426 + "value": 0.029642669774000697 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15290,13 +16429,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.48382983354784 + "value": 91.76967295177583 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.4992650145235 + "value": 100.07676596301671 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15308,7 +16447,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15320,7 +16459,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1888485376.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -15332,13 +16471,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1195376640.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -15350,13 +16489,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 267419648.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.787985790455073 + "value": 8.979819664862031 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15368,13 +16507,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.06949958669058 + "value": 20.220540073403278 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.600448737203938 + "value": 5.563610513360716 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15386,7 +16525,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.993861174159633 + "value": 20.065362804526647 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15405,21 +16544,38 @@ "time" ], "times": { - "compilation": 13864.809, - "data": 63812.673, - "framework": 558104.856, - "kernel_overhead": 193812.824, - "profiling_overhead": 53309.338, - "profiling_runs": 247170.021, + "compilation": 18703.241, + "data": 65461.716, + "framework": 563504.991, + "kernel_overhead": 195130.064, + "profiling_overhead": 55374.945, + "profiling_runs": 247538.266, "runtimes": [ - 8671.296 + 8161.6 ], - "search_algorithm": 23.931, - "validation": 15.227 + "search_algorithm": 29.378, + "validation": 23.269 }, - "timestamp": "2026-01-27 09:25:30 UTC" + "timestamp": "2026-03-02 14:27:43 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -15436,61 +16592,61 @@ { "name": "time", "unit": "", - "value": 8326.815 + "value": 8129.952 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.001151387520393 + "value": 6.414574575106736 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2117676.0 + "value": 7648.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2003352.0 + "value": 2000120.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.84609618413121 + "value": 55.811780923940134 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 21827418.0 + "value": 20825641.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420048.0 + "value": 138416422.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7939141542342815 + "value": 0.633332620218868 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02979428269827964 + "value": 0.029646370764779623 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15520,13 +16676,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.57818370070251 + "value": 90.30141482992077 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.906296853619 + "value": 99.56854139650764 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15538,7 +16694,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15550,7 +16706,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1888485376.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -15562,13 +16718,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1195376640.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -15580,13 +16736,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 267419648.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.91424762616427 + "value": 9.026804751904827 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15598,13 +16754,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.358640341919674 + "value": 20.32628862982027 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.681134255569869 + "value": 5.592706856886583 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15616,7 +16772,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.28189097304252 + "value": 20.17027763483842 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15635,21 +16791,38 @@ "time" ], "times": { - "compilation": 13999.055, - "data": 65423.447, - "framework": 563883.262, - "kernel_overhead": 195217.318, - "profiling_overhead": 54864.868, - "profiling_runs": 248377.629, + "compilation": 19730.875, + "data": 63840.585, + "framework": 560470.9450000001, + "kernel_overhead": 195072.837, + "profiling_overhead": 54116.453, + "profiling_runs": 247441.07, "runtimes": [ - 8326.815 + 8129.952 ], - "search_algorithm": 22.594, - "validation": 12.969 + "search_algorithm": 32.757, + "validation": 20.267 }, - "timestamp": "2026-01-27 09:25:30 UTC" + "timestamp": "2026-03-02 14:27:44 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -15666,61 +16839,61 @@ { "name": "time", "unit": "", - "value": 8165.92 + "value": 8442.944 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.041957402909665 + "value": 6.384312393957191 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113164.0 + "value": 6240.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1997060.0 + "value": 1997384.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.36488299232829 + "value": 56.25754447334731 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 23507300.0 + "value": 22524185.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418015.0 + "value": 138415408.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7892185111601604 + "value": 0.6337465186055217 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02961601848687404 + "value": 0.02974046362885063 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15750,13 +16923,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.06266513968733 + "value": 92.01492774753636 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.57046273944314 + "value": 100.11668829917512 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15768,7 +16941,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15780,7 +16953,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1888485376.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -15792,13 +16965,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1195376640.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -15810,13 +16983,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 267419648.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.890829760439175 + "value": 9.005868179863894 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15828,13 +17001,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.30508653281945 + "value": 20.279159764013066 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.666189918704256 + "value": 5.57973951514715 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15846,7 +17019,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.228560310184555 + "value": 20.12353263358683 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15865,21 +17038,38 @@ "time" ], "times": { - "compilation": 14282.079, - "data": 65437.866, - "framework": 562632.094, - "kernel_overhead": 194531.89, - "profiling_overhead": 55411.171, - "profiling_runs": 247251.167, + "compilation": 20043.002, + "data": 64608.091, + "framework": 563132.9750000001, + "kernel_overhead": 195601.353, + "profiling_overhead": 54670.324, + "profiling_runs": 248253.207, "runtimes": [ - 8165.92 + 8442.944 ], - "search_algorithm": 23.094, - "validation": 19.183 + "search_algorithm": 34.237, + "validation": 20.391 }, - "timestamp": "2026-01-27 09:25:30 UTC" + "timestamp": "2026-03-02 14:27:44 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -15896,61 +17086,61 @@ { "name": "time", "unit": "", - "value": 8107.392 + "value": 8083.616 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.004361538523964 + "value": 6.47670988882817 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103200.0 + "value": 25528.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999004.0 + "value": 2007276.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.678873878419395 + "value": 53.279410575425324 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 14869319.0 + "value": 12913600.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415527.0 + "value": 138455099.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.7951172067805264 + "value": 0.6389684835840983 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029762467750363134 + "value": 0.030089848611183793 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15980,13 +17170,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.2805008214013 + "value": 87.0573153251347 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.78719770465203 + "value": 100.37525519010097 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15998,7 +17188,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16010,7 +17200,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1888485376.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -16022,13 +17212,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1195376640.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16040,13 +17230,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 267419648.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.915485070868973 + "value": 9.088220788894262 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16058,13 +17248,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.36117369589923 + "value": 20.464542394435934 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.6818411949250045 + "value": 5.630746894172192 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16076,7 +17266,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.28443601497339 + "value": 20.30748893394529 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16095,21 +17285,38 @@ "time" ], "times": { - "compilation": 14206.051, - "data": 67386.837, - "framework": 560991.885, - "kernel_overhead": 192849.082, - "profiling_overhead": 55803.947, - "profiling_runs": 244952.019, + "compilation": 22836.567, + "data": 63281.837, + "framework": 559219.025, + "kernel_overhead": 195251.196, + "profiling_overhead": 53367.409, + "profiling_runs": 247318.583, "runtimes": [ - 8107.392 + 8083.616 ], - "search_algorithm": 21.773, - "validation": 18.124 + "search_algorithm": 33.237, + "validation": 19.548 }, - "timestamp": "2026-01-27 09:25:31 UTC" + "timestamp": "2026-03-02 14:27:44 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -16126,61 +17333,61 @@ { "name": "time", "unit": "", - "value": 10212.992 + "value": 9364.0 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.249364748789285 + "value": 6.336380536467666 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109072.0 + "value": 22536.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2225976.0 + "value": 2276680.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.64526026618651 + "value": 81.75423408396382 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135593699.0 + "value": 132964803.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424062.0 + "value": 138426900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.39969360747014643 + "value": 0.27442612796692684 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01236072655964445 + "value": 0.012773389570065876 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16210,13 +17417,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.1325867899026 + "value": 98.98392110434115 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.97247939758196 + "value": 98.99088416263311 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16228,7 +17435,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16240,7 +17447,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1783103488.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -16252,13 +17459,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1161822208.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16270,13 +17477,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 253313024.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.99696324806149 + "value": 7.266784923168747 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16288,13 +17495,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.051721952227396 + "value": 17.617717739555733 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.618868531737377 + "value": 4.703362877979539 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16306,7 +17513,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.091353147246224 + "value": 16.556689857643583 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16325,21 +17532,38 @@ "time" ], "times": { - "compilation": 13933.001, - "data": 64381.824, - "framework": 549595.845, - "kernel_overhead": 184503.571, - "profiling_overhead": 53987.671, - "profiling_runs": 246722.779, + "compilation": 101026.478, + "data": 62237.431, + "framework": 542985.752, + "kernel_overhead": 184728.898, + "profiling_overhead": 52451.488, + "profiling_runs": 243567.935, "runtimes": [ - 10212.992 + 9364.0 ], - "search_algorithm": 35.348, - "validation": 14.827 + "search_algorithm": 33.41, + "validation": 19.369 }, - "timestamp": "2026-01-27 09:25:31 UTC" + "timestamp": "2026-03-02 14:27:45 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -16356,61 +17580,61 @@ { "name": "time", "unit": "", - "value": 9834.016 + "value": 9450.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.379811489014262 + "value": 6.211148825520924 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2122720.0 + "value": 2176.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2230260.0 + "value": 2275656.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.37164892868776 + "value": 81.65517343856614 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135749324.0 + "value": 132932783.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425902.0 + "value": 138422298.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3992180994080388 + "value": 0.27569904256097133 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012418207890097567 + "value": 0.012853284092535864 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16440,13 +17664,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.89669425008164 + "value": 98.8072437073128 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.34791687695493 + "value": 99.5511554326265 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16458,7 +17682,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16470,7 +17694,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1783103488.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -16482,13 +17706,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1161822208.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16500,13 +17724,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 253313024.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.003400042648112 + "value": 7.2715368546293915 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16518,13 +17742,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.066279500970744 + "value": 17.628140163796484 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.622811793536875 + "value": 4.706145329372914 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16536,7 +17760,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.10510504254778 + "value": 16.566502715796112 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16555,21 +17779,38 @@ "time" ], "times": { - "compilation": 13895.54, - "data": 65456.328, - "framework": 551580.54, - "kernel_overhead": 184573.557, - "profiling_overhead": 55005.046, - "profiling_runs": 246545.609, + "compilation": 21008.762, + "data": 63786.551, + "framework": 546074.348, + "kernel_overhead": 184373.658, + "profiling_overhead": 53722.034, + "profiling_runs": 244192.105, "runtimes": [ - 9834.016 + 9450.176 ], - "search_algorithm": 23.477, - "validation": 19.737 + "search_algorithm": 31.449, + "validation": 21.397 }, - "timestamp": "2026-01-27 09:25:31 UTC" + "timestamp": "2026-03-02 14:27:45 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -16586,61 +17827,61 @@ { "name": "time", "unit": "", - "value": 9505.984 + "value": 9464.832 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.616288409152348 + "value": 6.031319363749591 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2111168.0 + "value": 18116.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2179060.0 + "value": 2200900.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.78331294724626 + "value": 81.74625845698984 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135011180.0 + "value": 133035314.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416810.0 + "value": 138425288.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4081965575175544 + "value": 0.27589867023953535 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012785473038987494 + "value": 0.012816380936884957 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16670,13 +17911,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.93840071186223 + "value": 99.04167374364971 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.1961867264232 + "value": 99.25333707770244 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16688,7 +17929,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16700,7 +17941,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1783103488.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -16712,13 +17953,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1161822208.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16730,13 +17971,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 253313024.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.14856812012327 + "value": 7.271879605390187 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16748,13 +17989,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.422252375958674 + "value": 17.630270800998638 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.71923559841947 + "value": 4.7067141408427755 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16766,7 +18007,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.441015018549898 + "value": 16.56848625359018 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16785,21 +18026,38 @@ "time" ], "times": { - "compilation": 14701.466, - "data": 69260.482, - "framework": 556243.283, - "kernel_overhead": 184748.631, - "profiling_overhead": 57847.055, - "profiling_runs": 244387.115, + "compilation": 21392.577, + "data": 64133.201, + "framework": 547006.1359999999, + "kernel_overhead": 184036.174, + "profiling_overhead": 54181.872, + "profiling_runs": 244654.889, "runtimes": [ - 9505.984 + 9464.832 ], - "search_algorithm": 23.5, - "validation": 22.802 + "search_algorithm": 34.309, + "validation": 26.334 }, - "timestamp": "2026-01-27 09:25:32 UTC" + "timestamp": "2026-03-02 14:27:45 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -16816,61 +18074,61 @@ { "name": "time", "unit": "", - "value": 9701.184 + "value": 9506.784 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.413423315309663 + "value": 6.048709863782219 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102752.0 + "value": 22892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2182384.0 + "value": 2201748.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.78778891869305 + "value": 81.78330468492516 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 134927698.0 + "value": 132861882.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138458227.0 + "value": 138427558.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41065663878111375 + "value": 0.27481009626865704 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012759462125538655 + "value": 0.012800478417880868 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16900,13 +18158,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82496708000886 + "value": 98.82360822107498 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.40688119775713 + "value": 98.96884238496331 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16918,7 +18176,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16930,7 +18188,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1783103488.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -16942,13 +18200,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1161822208.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16960,13 +18218,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 253313024.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.191295584979837 + "value": 7.284090830425568 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16978,13 +18236,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.524862208226253 + "value": 17.65901211470774 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.747029936530036 + "value": 4.714387145369363 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16996,7 +18254,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.537860328820095 + "value": 16.595515433749398 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17015,21 +18273,38 @@ "time" ], "times": { - "compilation": 14459.096, - "data": 63657.145, - "framework": 550678.758, - "kernel_overhead": 186534.702, - "profiling_overhead": 53250.669, - "profiling_runs": 247236.242, + "compilation": 23306.464, + "data": 63885.66, + "framework": 547191.534, + "kernel_overhead": 184414.324, + "profiling_overhead": 53996.324, + "profiling_runs": 244895.226, "runtimes": [ - 9701.184 + 9506.784 ], - "search_algorithm": 22.237, - "validation": 15.993 + "search_algorithm": 57.969, + "validation": 24.353 }, - "timestamp": "2026-01-27 09:25:32 UTC" + "timestamp": "2026-03-02 14:27:46 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -17046,61 +18321,61 @@ { "name": "time", "unit": "", - "value": 9250.24 + "value": 9561.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.991203859541066 + "value": 6.208678205063782 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2115512.0 + "value": 18992.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2170716.0 + "value": 2195120.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 77.88306820295561 + "value": 79.24084075612618 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115350522.0 + "value": 118650667.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418054.0 + "value": 138425053.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4201702525875663 + "value": 0.28122076679292357 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013055948265804996 + "value": 0.013053688371277662 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17130,13 +18405,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.79038422571556 + "value": 94.59199939189199 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.98995876400714 + "value": 99.13897291746345 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17148,7 +18423,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17160,7 +18435,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1783103488.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -17172,13 +18447,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1161822208.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -17190,13 +18465,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 253313024.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.389569593545824 + "value": 7.415427416264181 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17208,13 +18483,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.007605607833167 + "value": 17.977426366005474 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.877792583469457 + "value": 4.799393489069089 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17226,7 +18501,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.993415373739136 + "value": 16.894746266247715 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17245,21 +18520,38 @@ "time" ], "times": { - "compilation": 13424.02, - "data": 59432.892, - "framework": 537911.733, - "kernel_overhead": 185428.415, - "profiling_overhead": 48800.708, - "profiling_runs": 244249.718, + "compilation": 23702.051, + "data": 63437.459, + "framework": 547096.266, + "kernel_overhead": 185282.208, + "profiling_overhead": 53521.15, + "profiling_runs": 244855.449, "runtimes": [ - 9250.24 + 9561.728 ], - "search_algorithm": 22.752, - "validation": 17.742 + "search_algorithm": 31.338, + "validation": 21.894 }, - "timestamp": "2026-01-27 09:25:32 UTC" + "timestamp": "2026-03-02 14:27:46 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -17276,61 +18568,61 @@ { "name": "time", "unit": "", - "value": 2830.976 + "value": 2522.72 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.248233762290674 + "value": 19.41722347793904 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104636.0 + "value": 7492.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868588.0 + "value": 1872816.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.3352401732016235 + "value": 2.4284634417978754 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2140661.0 + "value": 51970.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101267.0 + "value": 2105019.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.18667743673651 + "value": 4.067078425440278 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17438209518214118 + "value": 0.19053778623406362 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17360,13 +18652,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.61139511228552 + "value": 98.74619851322049 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95550510565742 + "value": 99.9395668002173 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17378,7 +18670,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17390,7 +18682,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2373976064.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -17402,13 +18694,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2042626048.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -17420,13 +18712,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379191296.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.84020128283194 + "value": 45.24830711797154 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17438,13 +18730,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.54891805033951 + "value": 65.07622534650174 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.413359471766689 + "value": 2.3831625493103665 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17456,7 +18748,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.11940167471322 + "value": 91.6890246137801 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17475,21 +18767,38 @@ "time" ], "times": { - "compilation": 14273.068, - "data": 63863.8, - "framework": 1191935.057, - "kernel_overhead": 524477.157, - "profiling_overhead": 53189.967, - "profiling_runs": 550404.133, + "compilation": 88094.606, + "data": 61055.813, + "framework": 1083889.696, + "kernel_overhead": 473577.346, + "profiling_overhead": 51097.097, + "profiling_runs": 498159.44, "runtimes": [ - 2830.976 + 2522.72 ], - "search_algorithm": 22.701, - "validation": 14.72 + "search_algorithm": 30.617, + "validation": 20.072 }, - "timestamp": "2026-01-27 09:25:33 UTC" + "timestamp": "2026-03-02 14:27:47 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -17506,61 +18815,61 @@ { "name": "time", "unit": "", - "value": 2857.6 + "value": 2627.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.71802985180458 + "value": 18.47076262104095 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105328.0 + "value": 3416.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871640.0 + "value": 1871724.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.3339261804658635 + "value": 2.4052176429905185 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2140382.0 + "value": 48000.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104297.0 + "value": 2102113.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.186691032671284 + "value": 4.066713349359005 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1743911901794258 + "value": 0.19056278441259075 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17590,13 +18899,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.40412635703869 + "value": 98.55120653212393 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96257948845677 + "value": 99.95116930420934 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17608,7 +18917,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17620,7 +18929,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2373976064.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -17632,13 +18941,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2042626048.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -17650,13 +18959,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379191296.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.8391526792479 + "value": 45.24884835041067 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17668,13 +18977,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.54780934277954 + "value": 65.07720806633091 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.4133145387942876 + "value": 2.3831985375853604 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17686,7 +18995,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.11787720782303 + "value": 91.69045091324125 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17705,21 +19014,38 @@ "time" ], "times": { - "compilation": 14298.905, - "data": 65106.142, - "framework": 1192241.08, - "kernel_overhead": 523736.457, - "profiling_overhead": 53807.672, - "profiling_runs": 549590.809, + "compilation": 20384.524, + "data": 63645.69, + "framework": 1091409.4139999999, + "kernel_overhead": 474465.035, + "profiling_overhead": 53927.087, + "profiling_runs": 499371.602, "runtimes": [ - 2857.6 + 2627.744 ], - "search_algorithm": 32.211, - "validation": 14.693 + "search_algorithm": 26.466, + "validation": 18.94 }, - "timestamp": "2026-01-27 09:25:34 UTC" + "timestamp": "2026-03-02 14:27:47 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -17736,61 +19062,61 @@ { "name": "time", "unit": "", - "value": 2801.344 + "value": 2659.488 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.98817766735647 + "value": 18.77755386862837 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097604.0 + "value": 2948.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837512.0 + "value": 1839788.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.376720560372675 + "value": 2.4102156561897363 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2145098.0 + "value": 48822.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2103396.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.188269466893976 + "value": 4.067060008388745 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17444185491542594 + "value": 0.1905566967815411 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17820,13 +19146,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71261971373654 + "value": 98.7558574857234 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94750210315436 + "value": 99.94967511308295 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17838,7 +19164,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17850,7 +19176,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2373976064.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -17862,13 +19188,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2042626048.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -17880,13 +19206,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379191296.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.85614393964615 + "value": 45.24841285466676 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17898,13 +19224,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.57409495804985 + "value": 65.07610197616224 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.414379824960028 + "value": 2.3831580313535974 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17916,7 +19242,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.15496882561384 + "value": 91.68884909438201 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17935,21 +19261,38 @@ "time" ], "times": { - "compilation": 15376.344, - "data": 66867.848, - "framework": 1195523.302, - "kernel_overhead": 523466.175, - "profiling_overhead": 55649.026, - "profiling_runs": 549540.253, + "compilation": 19033.148, + "data": 63297.751, + "framework": 1091123.362, + "kernel_overhead": 474731.913, + "profiling_overhead": 53464.738, + "profiling_runs": 499628.96, "runtimes": [ - 2801.344 + 2659.488 ], - "search_algorithm": 25.482, - "validation": 18.375 + "search_algorithm": 30.298, + "validation": 17.757 }, - "timestamp": "2026-01-27 09:25:34 UTC" + "timestamp": "2026-03-02 14:27:48 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -17966,61 +19309,61 @@ { "name": "time", "unit": "", - "value": 2861.184 + "value": 2677.024 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.43379093273437 + "value": 18.787851813620446 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105208.0 + "value": 860.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839668.0 + "value": 1839616.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.3401955652016495 + "value": 2.408801762142962 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153262.0 + "value": 46787.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100642.0 + "value": 2103042.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.188551796662393 + "value": 4.0668536857141415 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17444568103200633 + "value": 0.19056256276871525 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18050,13 +19393,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.62704268881774 + "value": 98.54585378086554 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9501749815961 + "value": 99.95064959998611 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18068,7 +19411,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18080,7 +19423,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2373976064.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -18092,13 +19435,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2042626048.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18110,13 +19453,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379191296.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.855066987314295 + "value": 45.24949840721867 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18128,13 +19471,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.57380845328392 + "value": 65.07747075052244 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.414368213682893 + "value": 2.3832081573677653 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18146,7 +19489,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.15460383150672 + "value": 91.69082102180438 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18165,21 +19508,38 @@ "time" ], "times": { - "compilation": 14219.159, - "data": 63365.402, - "framework": 1194193.2280000001, - "kernel_overhead": 526143.782, - "profiling_overhead": 53078.85, - "profiling_runs": 551605.194, + "compilation": 17508.109, + "data": 62891.697, + "framework": 1089749.7880000002, + "kernel_overhead": 474498.338, + "profiling_overhead": 53103.986, + "profiling_runs": 499255.767, "runtimes": [ - 2861.184 + 2677.024 ], - "search_algorithm": 22.927, - "validation": 12.258 + "search_algorithm": 43.959, + "validation": 22.564 }, - "timestamp": "2026-01-27 09:25:35 UTC" + "timestamp": "2026-03-02 14:27:49 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -18196,61 +19556,61 @@ { "name": "time", "unit": "", - "value": 2808.704 + "value": 2670.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.28551922216729 + "value": 18.81252286924022 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104096.0 + "value": 3024.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841632.0 + "value": 1839628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.320918153739336 + "value": 2.4131726765850634 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2151975.0 + "value": 47381.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101467.0 + "value": 2103412.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.194773628211976 + "value": 4.078397215440323 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1747326592446524 + "value": 0.19110834519660264 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18280,13 +19640,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.10698799271498 + "value": 98.31901853464124 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96446832419534 + "value": 99.95679435813575 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18298,7 +19658,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18310,7 +19670,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2373976064.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -18322,13 +19682,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2042626048.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18340,13 +19700,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379191296.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.91103495095188 + "value": 45.3758426293719 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18358,13 +19718,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.663280385535685 + "value": 65.25984442843861 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.4179942734372375 + "value": 2.3898868809242657 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18376,7 +19736,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.28100885015861 + "value": 91.94779660860698 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18395,21 +19755,38 @@ "time" ], "times": { - "compilation": 14268.95, - "data": 61963.826, - "framework": 1196010.19, - "kernel_overhead": 528228.426, - "profiling_overhead": 51792.96, - "profiling_runs": 554024.978, + "compilation": 16067.589, + "data": 62094.933, + "framework": 1086389.9849999999, + "kernel_overhead": 473724.098, + "profiling_overhead": 52114.782, + "profiling_runs": 498456.172, "runtimes": [ - 2808.704 + 2670.336 ], - "search_algorithm": 31.324, - "validation": 13.525 + "search_algorithm": 26.455, + "validation": 19.787 }, - "timestamp": "2026-01-27 09:25:36 UTC" + "timestamp": "2026-03-02 14:27:49 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -18426,61 +19803,61 @@ { "name": "time", "unit": "", - "value": 3666.016 + "value": 3213.504 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.265057139211898 + "value": 15.16122796164553 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103612.0 + "value": 9960.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866412.0 + "value": 1872264.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.322329525460024 + "value": 1.9162748089835022 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153005.0 + "value": 65510.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100462.0 + "value": 2104996.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7621289763948698 + "value": 1.5934781092340113 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06606553907931477 + "value": 0.07466985115432917 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18510,13 +19887,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6743710268328 + "value": 98.69688563333997 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96510093812205 + "value": 99.96559762678942 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18528,7 +19905,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18540,7 +19917,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2695888896.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -18552,13 +19929,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1763704832.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18570,13 +19947,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 499810304.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.045159254851267 + "value": 45.37119267639875 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18588,13 +19965,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.11648657537932 + "value": 50.99216090152354 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0904619558014044 + "value": 1.0332884167056773 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18606,7 +19983,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.00472599667323 + "value": 95.16285917962016 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18625,21 +20002,38 @@ "time" ], "times": { - "compilation": 18493.806, - "data": 61099.101, - "framework": 1899579.83, - "kernel_overhead": 878855.474, - "profiling_overhead": 50990.259, - "profiling_runs": 908634.996, + "compilation": 54340.671, + "data": 64163.176, + "framework": 1842479.9309999999, + "kernel_overhead": 848203.714, + "profiling_overhead": 53723.112, + "profiling_runs": 876389.929, "runtimes": [ - 3666.016 + 3213.504 ], - "search_algorithm": 27.621, - "validation": 14.677 + "search_algorithm": 31.027, + "validation": 18.539 }, - "timestamp": "2026-01-27 09:25:37 UTC" + "timestamp": "2026-03-02 14:27:50 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -18656,61 +20050,61 @@ { "name": "time", "unit": "", - "value": 3671.104 + "value": 3385.12 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.158494074691504 + "value": 14.290686126185923 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2106300.0 + "value": 2372.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871896.0 + "value": 1871688.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3227098294226027 + "value": 1.8945074268046067 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2155322.0 + "value": 57802.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108226.0 + "value": 2099494.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7620899005705497 + "value": 1.5934279485377247 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06606345414124794 + "value": 0.07466641194531343 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18740,13 +20134,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.61195866505611 + "value": 98.59483671243349 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96876579107679 + "value": 99.96061393102192 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18758,7 +20152,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18770,7 +20164,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2695888896.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -18782,13 +20176,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1763704832.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18800,13 +20194,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 499810304.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.043106304450323 + "value": 45.37153052573322 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18818,13 +20212,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.11340884346248 + "value": 50.99235443856001 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.090387567261422 + "value": 1.03329233847668 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18836,7 +20230,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.99902733951626 + "value": 95.16325740578608 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18855,21 +20249,38 @@ "time" ], "times": { - "compilation": 18846.761, - "data": 60567.184, - "framework": 1899126.4789999998, - "kernel_overhead": 878996.69, - "profiling_overhead": 50530.71, - "profiling_runs": 909031.895, + "compilation": 15998.424, + "data": 64324.166, + "framework": 1840050.808, + "kernel_overhead": 846582.076, + "profiling_overhead": 54017.151, + "profiling_runs": 875127.415, "runtimes": [ - 3671.104 + 3385.12 ], - "search_algorithm": 24.055, - "validation": 15.039 + "search_algorithm": 26.279, + "validation": 18.72 }, - "timestamp": "2026-01-27 09:25:38 UTC" + "timestamp": "2026-03-02 14:27:51 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -18886,61 +20297,61 @@ { "name": "time", "unit": "", - "value": 3717.056 + "value": 3327.68 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.86199681915395 + "value": 14.68314160643388 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097616.0 + "value": 6704.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837764.0 + "value": 1839472.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.308022406421817 + "value": 1.9015923543155782 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2159606.0 + "value": 63061.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099295.0 + "value": 2101422.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7625615794548404 + "value": 1.593475130740971 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0660808658142192 + "value": 0.0746693611157479 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18970,13 +20381,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69783736274734 + "value": 98.69175088677898 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96755564871481 + "value": 99.96050831343801 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18988,7 +20399,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19000,7 +20411,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2695888896.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19012,13 +20423,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1763704832.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -19030,13 +20441,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 499810304.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.05162608307581 + "value": 45.373441977810394 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19048,13 +20459,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.125845183572075 + "value": 50.99442241248047 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.090688152630282 + "value": 1.0333342432216501 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19066,7 +20477,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.02215245133232 + "value": 95.16708479767712 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19085,21 +20496,38 @@ "time" ], "times": { - "compilation": 14712.001, - "data": 61656.17, - "framework": 1899959.756, - "kernel_overhead": 878540.952, - "profiling_overhead": 51360.132, - "profiling_runs": 908402.502, + "compilation": 16139.976, + "data": 65555.579, + "framework": 1846845.044, + "kernel_overhead": 848661.875, + "profiling_overhead": 55717.091, + "profiling_runs": 876910.499, "runtimes": [ - 3717.056 + 3327.68 ], - "search_algorithm": 27.327, - "validation": 13.571 + "search_algorithm": 24.704, + "validation": 17.39 }, - "timestamp": "2026-01-27 09:25:39 UTC" + "timestamp": "2026-03-02 14:27:52 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -19116,61 +20544,61 @@ { "name": "time", "unit": "", - "value": 3711.424 + "value": 3351.584 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.927439599079264 + "value": 14.654024901757174 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104380.0 + "value": 5196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840172.0 + "value": 1837244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3140920042058473 + "value": 1.8980004428231028 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2165102.0 + "value": 59351.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101521.0 + "value": 2100582.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.762591124062999 + "value": 1.5934513758413567 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06608261423854468 + "value": 0.07467210632218972 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19200,13 +20628,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6629372650336 + "value": 98.59558000141246 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97183939784368 + "value": 99.96676276973233 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19218,7 +20646,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19230,7 +20658,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2695888896.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19242,13 +20670,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1763704832.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -19260,13 +20688,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 499810304.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.050960248879893 + "value": 45.37256451933955 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19278,13 +20706,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.12510548827567 + "value": 50.99310661221352 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0906702742527568 + "value": 1.0333075802767877 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19296,7 +20724,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.02080593733523 + "value": 95.16466113181113 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19315,21 +20743,38 @@ "time" ], "times": { - "compilation": 18222.788, - "data": 62183.257, - "framework": 1901114.846, - "kernel_overhead": 878971.023, - "profiling_overhead": 51190.519, - "profiling_runs": 908770.047, + "compilation": 16299.225, + "data": 63476.446, + "framework": 1841583.446, + "kernel_overhead": 848101.809, + "profiling_overhead": 53754.198, + "profiling_runs": 876250.993, "runtimes": [ - 3711.424 + 3351.584 ], - "search_algorithm": 34.527, - "validation": 14.735 + "search_algorithm": 27.131, + "validation": 20.413 }, - "timestamp": "2026-01-27 09:25:40 UTC" + "timestamp": "2026-03-02 14:27:53 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -19346,61 +20791,61 @@ { "name": "time", "unit": "", - "value": 3708.608 + "value": 3288.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.87200060735718 + "value": 14.67740098097442 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098084.0 + "value": 212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837496.0 + "value": 1834496.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.308816262721178 + "value": 1.8888999233601558 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2158284.0 + "value": 53061.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100062.0 + "value": 2098984.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7631040283635486 + "value": 1.5935080642912398 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06610059070409878 + "value": 0.07466766871715136 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19430,13 +20875,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.4012568088631 + "value": 98.35688676425403 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96104560881587 + "value": 99.95648449631103 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19448,7 +20893,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19460,7 +20905,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2695888896.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19472,13 +20917,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1763704832.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -19490,13 +20935,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 499810304.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.06306455197106 + "value": 45.37425945303741 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19508,13 +20953,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.14225481119326 + "value": 50.9953193809951 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.091084772047884 + "value": 1.033352419097313 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19526,7 +20971,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.05274489886828 + "value": 95.16883321224304 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19545,21 +20990,38 @@ "time" ], "times": { - "compilation": 18400.724, - "data": 60909.469, - "framework": 1894303.9980000001, - "kernel_overhead": 876433.859, - "profiling_overhead": 50456.812, - "profiling_runs": 906503.858, + "compilation": 16192.956, + "data": 65112.363, + "framework": 1845724.784, + "kernel_overhead": 848646.651, + "profiling_overhead": 55420.697, + "profiling_runs": 876545.073, "runtimes": [ - 3708.608 + 3288.32 ], - "search_algorithm": 26.48, - "validation": 17.314 + "search_algorithm": 26.489, + "validation": 20.057 }, - "timestamp": "2026-01-27 09:25:41 UTC" + "timestamp": "2026-03-02 14:27:54 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -19576,61 +21038,61 @@ { "name": "time", "unit": "", - "value": 6304.608 + "value": 5388.064 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.557134335989094 + "value": 9.082009454432734 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113556.0 + "value": 8956.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1865836.0 + "value": 1866152.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.966928250302347 + "value": 1.160726919686435 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2206771.0 + "value": 97209.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102215.0 + "value": 2100912.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6214888179871144 + "value": 0.47672694971304846 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019417370177624693 + "value": 0.022342309712631857 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19660,13 +21122,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8036460324469 + "value": 73.90037931878727 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98478721814315 + "value": 99.97638788962449 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19678,7 +21140,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19690,7 +21152,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4333240320.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19702,13 +21164,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1492647936.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -19720,13 +21182,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 853852160.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.010784063511068 + "value": 42.991188755223 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19738,13 +21200,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.515216451221175 + "value": 30.51190469892855 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.42401041932494793 + "value": 0.3687351764152742 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19756,7 +21218,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.34126601983543 + "value": 96.94244511462298 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19775,21 +21237,38 @@ "time" ], "times": { - "compilation": 16326.723, - "data": 62618.484, - "framework": 3040494.232, - "kernel_overhead": 1441899.219, - "profiling_overhead": 51554.365, - "profiling_runs": 1484422.164, + "compilation": 64159.091, + "data": 64952.944, + "framework": 3081225.75, + "kernel_overhead": 1461266.823, + "profiling_overhead": 55263.878, + "profiling_runs": 1499742.105, "runtimes": [ - 6304.608 + 5388.064 ], - "search_algorithm": 31.891, - "validation": 18.106 + "search_algorithm": 24.34, + "validation": 18.868 }, - "timestamp": "2026-01-27 09:25:42 UTC" + "timestamp": "2026-03-02 14:27:56 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -19806,61 +21285,61 @@ { "name": "time", "unit": "", - "value": 6214.304 + "value": 5482.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.713102288454728 + "value": 9.032994877199862 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2112876.0 + "value": 16184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1875036.0 + "value": 1870680.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9836119355036776 + "value": 1.1624590262703274 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2209935.0 + "value": 106743.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2110584.0 + "value": 2106197.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6214214825981468 + "value": 0.4767120449546747 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01941754016363639 + "value": 0.02234145665642865 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19890,13 +21369,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.76530176446796 + "value": 73.8646226800163 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97475941007212 + "value": 99.97513244785675 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19908,7 +21387,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19920,7 +21399,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4333240320.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19932,13 +21411,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1492647936.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -19950,13 +21429,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 853852160.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.013906053133496 + "value": 42.99005497644099 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19968,13 +21447,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.51810816368978 + "value": 30.51112285763003 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.424056661309004 + "value": 0.36872572789372227 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19986,7 +21465,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.35046416049614 + "value": 96.93996104789827 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20005,21 +21484,38 @@ "time" ], "times": { - "compilation": 17162.706, - "data": 62594.818, - "framework": 3045226.398, - "kernel_overhead": 1443823.312, - "profiling_overhead": 52503.207, - "profiling_runs": 1486305.061, + "compilation": 15152.205, + "data": 66073.72, + "framework": 3090489.722, + "kernel_overhead": 1464608.749, + "profiling_overhead": 56390.06, + "profiling_runs": 1503417.193, "runtimes": [ - 6214.304 + 5482.912 ], - "search_algorithm": 26.385, - "validation": 18.569 + "search_algorithm": 28.806, + "validation": 18.404 }, - "timestamp": "2026-01-27 09:25:44 UTC" + "timestamp": "2026-03-02 14:27:57 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -20036,61 +21532,61 @@ { "name": "time", "unit": "", - "value": 6226.144 + "value": 5449.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.479711158260198 + "value": 8.83621420294143 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099136.0 + "value": 820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837896.0 + "value": 1834104.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9746111012742023 + "value": 1.1567665125316449 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2200854.0 + "value": 90260.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099054.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6215698863351113 + "value": 0.4767141688854015 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019420850881718958 + "value": 0.02234260626647558 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20120,13 +21616,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.81699622124492 + "value": 73.8986101026343 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97877661978637 + "value": 99.97699396124155 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20138,7 +21634,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20150,7 +21646,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4333240320.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -20162,13 +21658,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1492647936.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -20180,13 +21676,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 853852160.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.01765199415382 + "value": 42.99147512222771 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20198,13 +21694,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.521563842839917 + "value": 30.51212472040053 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4241119218032262 + "value": 0.36873783536616855 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20216,7 +21712,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.36145620180258 + "value": 96.94314416699883 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20235,21 +21731,38 @@ "time" ], "times": { - "compilation": 15503.054, - "data": 62661.1, - "framework": 3040916.04, - "kernel_overhead": 1441631.324, - "profiling_overhead": 52709.826, - "profiling_runs": 1483913.79, + "compilation": 17309.392, + "data": 61102.815, + "framework": 3078264.67, + "kernel_overhead": 1463694.951, + "profiling_overhead": 51257.143, + "profiling_runs": 1502209.761, "runtimes": [ - 6226.144 + 5449.632 ], - "search_algorithm": 32.112, - "validation": 14.497 + "search_algorithm": 25.679, + "validation": 18.053 }, - "timestamp": "2026-01-27 09:25:45 UTC" + "timestamp": "2026-03-02 14:27:59 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -20266,61 +21779,61 @@ { "name": "time", "unit": "", - "value": 6259.232 + "value": 5447.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.42779001448874 + "value": 8.854275817009578 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109036.0 + "value": 4500.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838512.0 + "value": 1834220.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.968960238684181 + "value": 1.1581313212291875 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210881.0 + "value": 91995.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101773.0 + "value": 2100070.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6215651677387657 + "value": 0.4767037878119691 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019420915953489055 + "value": 0.022342061913402694 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20350,13 +21863,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.7987858252636 + "value": 73.86589391521635 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9778787382853 + "value": 99.97807964723252 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20368,7 +21881,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20380,7 +21893,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4333240320.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -20392,13 +21905,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1492647936.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -20410,13 +21923,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 853852160.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.0180499372562 + "value": 42.989980957015746 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20428,13 +21941,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.521890892059645 + "value": 30.511049995557897 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4241171517162858 + "value": 0.36872484735842675 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20446,7 +21959,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.36249650050402 + "value": 96.93972955047134 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20465,21 +21978,38 @@ "time" ], "times": { - "compilation": 14774.667, - "data": 61352.466, - "framework": 3038709.1720000003, - "kernel_overhead": 1441874.415, - "profiling_overhead": 51082.52, - "profiling_runs": 1484399.771, + "compilation": 15558.579, + "data": 65500.37, + "framework": 3083473.1850000005, + "kernel_overhead": 1461866.019, + "profiling_overhead": 55833.461, + "profiling_runs": 1500273.335, "runtimes": [ - 6259.232 + 5447.232 ], - "search_algorithm": 27.751, - "validation": 14.815 + "search_algorithm": 37.465, + "validation": 18.494 }, - "timestamp": "2026-01-27 09:25:47 UTC" + "timestamp": "2026-03-02 14:28:0 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -20496,61 +22026,61 @@ { "name": "time", "unit": "", - "value": 6158.56 + "value": 5447.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.591078595859006 + "value": 8.85477705336656 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098012.0 + "value": 7280.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838528.0 + "value": 1830252.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9752618106236735 + "value": 1.1611545044615277 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2199070.0 + "value": 95418.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099902.0 + "value": 2100808.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6222538154222149 + "value": 0.47707855122532533 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019441488807948784 + "value": 0.022357935407691713 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20580,13 +22110,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.67446551041674 + "value": 65.75410187826006 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96877553000681 + "value": 99.9667323819178 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20598,7 +22128,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20610,7 +22140,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4333240320.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -20622,13 +22152,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1492647936.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -20640,13 +22170,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 853852160.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.05142398482834 + "value": 43.02540643810495 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20658,13 +22188,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.552403566403466 + "value": 30.536193140745993 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.42460508632798516 + "value": 0.3690287012858708 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20676,7 +22206,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.45952306140065 + "value": 97.01957978720422 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20695,21 +22225,38 @@ "time" ], "times": { - "compilation": 18554.509, - "data": 64526.241, - "framework": 3046114.266, - "kernel_overhead": 1442807.746, - "profiling_overhead": 53504.151, - "profiling_runs": 1485276.128, + "compilation": 16034.829, + "data": 62159.137, + "framework": 3085063.4860000005, + "kernel_overhead": 1465950.479, + "profiling_overhead": 52353.403, + "profiling_runs": 1504600.467, "runtimes": [ - 6158.56 + 5447.36 ], - "search_algorithm": 24.601, - "validation": 21.77 + "search_algorithm": 26.049, + "validation": 14.717 }, - "timestamp": "2026-01-27 09:25:49 UTC" + "timestamp": "2026-03-02 14:28:2 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -20726,61 +22273,61 @@ { "name": "time", "unit": "", - "value": 3352.512 + "value": 3203.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 31.6936574528572 + "value": 15.169488465189222 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099032.0 + "value": 9020.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868020.0 + "value": 1872948.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.718127478726528 + "value": 1.9110962585700848 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2142145.0 + "value": 64810.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099918.0 + "value": 2102807.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9773097848940935 + "value": 1.5875502207102619 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07413490126718918 + "value": 0.07437946894221355 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20810,13 +22357,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6127517884258 + "value": 98.7338885235761 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97147656958843 + "value": 99.9458710875116 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20828,7 +22375,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20840,7 +22387,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1824522240.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -20852,13 +22399,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1898971136.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -20870,13 +22417,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 472612864.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.60965139703659 + "value": 42.028436776209475 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20888,13 +22435,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.62386559480905 + "value": 50.803883720977815 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.223574876437035 + "value": 1.0294732296975484 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20906,7 +22453,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.13005208715167 + "value": 89.25481809060022 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20925,21 +22472,38 @@ "time" ], "times": { - "compilation": 14930.161, - "data": 63105.907, - "framework": 1837364.4789999998, - "kernel_overhead": 846559.989, - "profiling_overhead": 53147.856, - "profiling_runs": 874550.727, + "compilation": 75464.487, + "data": 60041.715, + "framework": 1666980.987, + "kernel_overhead": 764381.548, + "profiling_overhead": 50320.342, + "profiling_runs": 792237.382, "runtimes": [ - 3352.512 + 3203.008 ], - "search_algorithm": 26.164, - "validation": 14.244 + "search_algorithm": 27.797, + "validation": 18.135 }, - "timestamp": "2026-01-27 09:25:50 UTC" + "timestamp": "2026-03-02 14:28:3 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -20956,61 +22520,61 @@ { "name": "time", "unit": "", - "value": 3454.592 + "value": 3377.12 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 30.988912277022035 + "value": 14.785001462017785 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100020.0 + "value": 6520.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871400.0 + "value": 1870404.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.6598965531556136 + "value": 1.8798335371609292 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2145650.0 + "value": 59602.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100005.0 + "value": 2100189.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9771926531315134 + "value": 1.5874844139556488 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0741296999484567 + "value": 0.07439232945340526 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21040,13 +22604,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.52416352289737 + "value": 98.68824397372421 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96670388484752 + "value": 99.96836165454401 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21058,7 +22622,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21070,7 +22634,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1824522240.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -21082,13 +22646,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1898971136.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -21100,13 +22664,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 472612864.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.608466443864415 + "value": 42.02633914578213 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21118,13 +22682,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.62273056747618 + "value": 50.801236244143496 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2235474429150737 + "value": 1.0294195820956813 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21136,7 +22700,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.1280869113384 + "value": 89.25020244902174 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21155,21 +22719,38 @@ "time" ], "times": { - "compilation": 15177.486, - "data": 62440.539, - "framework": 1836270.649, - "kernel_overhead": 846535.423, - "profiling_overhead": 52259.061, - "profiling_runs": 875035.626, + "compilation": 16453.457, + "data": 65358.592, + "framework": 1683321.605, + "kernel_overhead": 767100.047, + "profiling_overhead": 55410.052, + "profiling_runs": 795452.914, "runtimes": [ - 3454.592 + 3377.12 ], - "search_algorithm": 26.978, - "validation": 17.404 + "search_algorithm": 25.974, + "validation": 17.577 }, - "timestamp": "2026-01-27 09:25:51 UTC" + "timestamp": "2026-03-02 14:28:4 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -21186,61 +22767,61 @@ { "name": "time", "unit": "", - "value": 3391.776 + "value": 3360.128 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 30.925274907526536 + "value": 14.659424001460536 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2100468.0 + "value": 11280.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837700.0 + "value": 1838744.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.6924934678009502 + "value": 1.900716269863505 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2156064.0 + "value": 69607.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099956.0 + "value": 2101102.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9782882591877593 + "value": 1.5875443199915216 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07416164389712679 + "value": 0.07439477057071711 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21270,13 +22851,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.66213612815854 + "value": 98.72629969824082 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96414170148827 + "value": 99.96751226290753 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21288,7 +22869,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21300,7 +22881,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1824522240.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -21312,13 +22893,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1898971136.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -21330,13 +22911,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 472612864.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.62596041333609 + "value": 42.02814664815189 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21348,13 +22929,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.64584297132833 + "value": 50.80333489681836 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2241060679105236 + "value": 1.0294621085048643 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21366,7 +22947,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.16874467376196 + "value": 89.25385559202846 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21385,21 +22966,38 @@ "time" ], "times": { - "compilation": 15273.741, - "data": 60389.35, - "framework": 1827119.264, - "kernel_overhead": 844111.384, - "profiling_overhead": 50346.074, - "profiling_runs": 872272.456, + "compilation": 17358.989, + "data": 64591.7, + "framework": 1678051.1979999999, + "kernel_overhead": 765160.363, + "profiling_overhead": 54832.109, + "profiling_runs": 793467.026, "runtimes": [ - 3391.776 + 3360.128 ], - "search_algorithm": 34.912, - "validation": 13.158 + "search_algorithm": 24.154, + "validation": 20.389 }, - "timestamp": "2026-01-27 09:25:52 UTC" + "timestamp": "2026-03-02 14:28:5 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -21416,61 +23014,61 @@ { "name": "time", "unit": "", - "value": 3310.432 + "value": 3319.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 31.284769542135344 + "value": 14.693710720698661 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098040.0 + "value": 5664.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839376.0 + "value": 1837816.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.713781321740659 + "value": 1.8900411431436674 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2155531.0 + "value": 59762.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103083.0 + "value": 2101478.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9782273495500273 + "value": 1.587526558011264 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07416024745188818 + "value": 0.07439480660316873 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21500,13 +23098,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.61443842799194 + "value": 98.68385884476073 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96164909444593 + "value": 99.96916808861167 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21518,7 +23116,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21530,7 +23128,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1824522240.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -21542,13 +23140,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1898971136.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -21560,13 +23158,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 472612864.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.62609995035257 + "value": 42.02746008880738 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21578,13 +23176,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.64615218514697 + "value": 50.80251802843808 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2241135415843625 + "value": 1.0294455557520412 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21596,7 +23194,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.16932380140665 + "value": 89.25245435303523 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21615,21 +23213,38 @@ "time" ], "times": { - "compilation": 15418.354, - "data": 62444.449, - "framework": 1837252.312, - "kernel_overhead": 847358.958, - "profiling_overhead": 52063.188, - "profiling_runs": 875385.717, + "compilation": 17994.515, + "data": 66633.242, + "framework": 1684941.884, + "kernel_overhead": 766842.209, + "profiling_overhead": 56452.674, + "profiling_runs": 795013.759, "runtimes": [ - 3310.432 + 3319.872 ], - "search_algorithm": 26.347, - "validation": 12.992 + "search_algorithm": 33.808, + "validation": 20.738 }, - "timestamp": "2026-01-27 09:25:53 UTC" + "timestamp": "2026-03-02 14:28:6 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -21646,61 +23261,61 @@ { "name": "time", "unit": "", - "value": 3388.544 + "value": 3318.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 31.14291326448118 + "value": 14.815897435897435 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105564.0 + "value": 8176.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839112.0 + "value": 1840848.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.715189142128396 + "value": 1.9025900451083488 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2158951.0 + "value": 62722.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100527.0 + "value": 2101382.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9889150066673613 + "value": 1.596029895118679 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2621440.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07456076319931206 + "value": 0.07478838411260522 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21730,13 +23345,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.37760385932948 + "value": 98.52806345449801 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95829185253764 + "value": 99.95470636855835 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21748,7 +23363,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21760,7 +23375,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1824522240.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -21772,13 +23387,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 100663296.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1898971136.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -21790,13 +23405,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 472612864.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.83017846478061 + "value": 42.255780963791054 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21808,13 +23423,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.92138604417822 + "value": 51.07867227310315 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2307659224545027 + "value": 1.0350414547528228 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21826,7 +23441,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.65389274436443 + "value": 89.73765883594822 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21845,21 +23460,38 @@ "time" ], "times": { - "compilation": 15701.298, - "data": 61357.454, - "framework": 1833858.245, - "kernel_overhead": 846653.398, - "profiling_overhead": 51111.98, - "profiling_runs": 874735.413, + "compilation": 17384.471, + "data": 62550.66, + "framework": 1671774.809, + "kernel_overhead": 764197.902, + "profiling_overhead": 52661.114, + "profiling_runs": 792365.133, "runtimes": [ - 3388.544 + 3318.08 ], - "search_algorithm": 38.549, - "validation": 17.811 + "search_algorithm": 27.633, + "validation": 18.324 }, - "timestamp": "2026-01-27 09:25:54 UTC" + "timestamp": "2026-03-02 14:28:7 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -21876,61 +23508,61 @@ { "name": "time", "unit": "", - "value": 6701.28 + "value": 6073.984 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.5538044403497 + "value": 8.041682399229515 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107732.0 + "value": 8656.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868348.0 + "value": 1872372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8498570383041355 + "value": 1.0355783801930365 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2208852.0 + "value": 112644.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101491.0 + "value": 2108633.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.5843098677777812 + "value": 0.4193345887558227 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01825583990769391 + "value": 0.019651220894467667 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21960,13 +23592,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.94893309030182 + "value": 82.00619466328523 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97703934059217 + "value": 99.9707784442227 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21978,7 +23610,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21990,7 +23622,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6415712256.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -22002,13 +23634,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1155006464.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22020,13 +23652,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 937738240.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 35.073083052459225 + "value": 42.84554831921402 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22038,13 +23670,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.93103108310527 + "value": 26.838309499493267 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.39867737693930544 + "value": 0.32433992192991135 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22056,7 +23688,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.09316291017713 + "value": 93.6528119236993 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22075,21 +23707,38 @@ "time" ], "times": { - "compilation": 14840.152, - "data": 60953.569, - "framework": 3524944.984, - "kernel_overhead": 1684483.366, - "profiling_overhead": 50478.425, - "profiling_runs": 1729029.624, + "compilation": 86913.895, + "data": 60811.894, + "framework": 3576535.886, + "kernel_overhead": 1711699.383, + "profiling_overhead": 50302.698, + "profiling_runs": 1753721.911, "runtimes": [ - 6701.28 + 6073.984 ], - "search_algorithm": 24.908, - "validation": 15.871 + "search_algorithm": 34.206, + "validation": 16.313 }, - "timestamp": "2026-01-27 09:25:55 UTC" + "timestamp": "2026-03-02 14:28:8 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -22106,61 +23755,61 @@ { "name": "time", "unit": "", - "value": 6760.384 + "value": 6226.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.579809995289967 + "value": 7.933963395571462 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107272.0 + "value": 14028.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872664.0 + "value": 1870056.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8634567454569144 + "value": 1.0239426465834878 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2211169.0 + "value": 114453.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108796.0 + "value": 2101756.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.5842987931919865 + "value": 0.41932558369979606 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018254982348911722 + "value": 0.01965205122122913 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22190,13 +23839,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.91233585656829 + "value": 81.97948506332897 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97758925338934 + "value": 99.97705861313837 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22208,7 +23857,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22220,7 +23869,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6415712256.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -22232,13 +23881,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1155006464.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22250,13 +23899,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 937738240.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 35.071285141369735 + "value": 42.84463688285275 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22268,13 +23917,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.92972283739663 + "value": 26.83775755450373 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3986564565062205 + "value": 0.32433325169627314 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22286,7 +23935,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.08859273190212 + "value": 93.65088590073955 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22305,21 +23954,38 @@ "time" ], "times": { - "compilation": 15309.865, - "data": 60301.286, - "framework": 3525717.507, - "kernel_overhead": 1685225.964, - "profiling_overhead": 50331.175, - "profiling_runs": 1729859.082, + "compilation": 15752.352, + "data": 61644.977, + "framework": 3581174.9809999997, + "kernel_overhead": 1712686.358, + "profiling_overhead": 51568.555, + "profiling_runs": 1755275.091, "runtimes": [ - 6760.384 + 6226.496 ], - "search_algorithm": 25.091, - "validation": 15.576 + "search_algorithm": 24.984, + "validation": 16.516 }, - "timestamp": "2026-01-27 09:25:57 UTC" + "timestamp": "2026-03-02 14:28:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -22336,61 +24002,61 @@ { "name": "time", "unit": "", - "value": 6660.224 + "value": 6361.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.508509475402239 + "value": 7.779906156682226 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102608.0 + "value": 10036.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838072.0 + "value": 1837444.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8614686532081357 + "value": 1.0231079922717738 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210993.0 + "value": 111509.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100370.0 + "value": 2102889.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.5843982581033305 + "value": 0.4193307938405543 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018259440798087348 + "value": 0.019652322145445034 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22420,13 +24086,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.9615626153873 + "value": 82.00532514142054 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9761945029573 + "value": 99.97792432327034 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22438,7 +24104,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22450,7 +24116,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6415712256.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -22462,13 +24128,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1155006464.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22480,13 +24146,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 937738240.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 35.08005468944198 + "value": 42.84498441698085 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22498,13 +24164,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.93615934633101 + "value": 26.837895149555212 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3987593840782913 + "value": 0.32433491452709545 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22516,7 +24182,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.11107779958049 + "value": 93.65136604139364 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22535,21 +24201,38 @@ "time" ], "times": { - "compilation": 14621.572, - "data": 59924.193, - "framework": 3527953.767, - "kernel_overhead": 1686928.427, - "profiling_overhead": 49845.177, - "profiling_runs": 1731255.97, + "compilation": 16184.805, + "data": 65027.935, + "framework": 3586406.308, + "kernel_overhead": 1711798.048, + "profiling_overhead": 55362.856, + "profiling_runs": 1754217.469, "runtimes": [ - 6660.224 + 6361.248 ], - "search_algorithm": 25.681, - "validation": 13.485 + "search_algorithm": 25.541, + "validation": 20.867 }, - "timestamp": "2026-01-27 09:25:59 UTC" + "timestamp": "2026-03-02 14:28:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -22566,61 +24249,61 @@ { "name": "time", "unit": "", - "value": 6666.88 + "value": 6228.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.508967116399733 + "value": 7.7644951166400755 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2118100.0 + "value": 6508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1845724.0 + "value": 1837372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8607607576535068 + "value": 1.0202195732973114 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2229109.0 + "value": 106725.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105787.0 + "value": 2101487.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.5844020796763401 + "value": 0.4193147982646368 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018259502118298392 + "value": 0.019651652077351384 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22650,13 +24333,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.93438412246586 + "value": 81.97554863038033 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97675927435003 + "value": 99.97519754564172 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22668,7 +24351,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22680,7 +24363,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6415712256.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -22692,13 +24375,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1155006464.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22710,13 +24393,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 937738240.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 35.07999848155109 + "value": 42.844807981157984 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22728,13 +24411,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.936102223288916 + "value": 26.837712047558494 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3987584706116758 + "value": 0.32433270174661755 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22746,7 +24429,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.11087824800981 + "value": 93.6507271033539 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22765,21 +24448,38 @@ "time" ], "times": { - "compilation": 15145.04, - "data": 60225.47, - "framework": 3534012.551, - "kernel_overhead": 1689451.138, - "profiling_overhead": 50109.573, - "profiling_runs": 1734226.37, + "compilation": 15314.686, + "data": 60989.447, + "framework": 3582716.3549999995, + "kernel_overhead": 1714069.974, + "profiling_overhead": 51310.533, + "profiling_runs": 1756346.401, "runtimes": [ - 6666.88 + 6228.736 ], - "search_algorithm": 27.845, - "validation": 14.899 + "search_algorithm": 24.026, + "validation": 16.195 }, - "timestamp": "2026-01-27 09:26:1 UTC" + "timestamp": "2026-03-02 14:28:14 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -22796,61 +24496,61 @@ { "name": "time", "unit": "", - "value": 6854.56 + "value": 6185.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.6044681785152 + "value": 7.767700052130848 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2115084.0 + "value": 440.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843068.0 + "value": 1830520.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.874668476125645 + "value": 1.023956839868056 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2224703.0 + "value": 100706.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109479.0 + "value": 2099184.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.5849343415418866 + "value": 0.42001284357053886 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018275179949843196 + "value": 0.01968335807315302 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22880,13 +24580,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.92349997665701 + "value": 81.95574964112167 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97894944812764 + "value": 99.96870533481155 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22898,7 +24598,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22910,7 +24610,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6415712256.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -22922,13 +24622,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1155006464.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22940,13 +24640,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 937738240.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 35.10944568816628 + "value": 42.91654511770431 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22958,13 +24658,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.956965937246302 + "value": 26.882757758245457 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.39909210666250794 + "value": 0.3248770774006714 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22976,7 +24676,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.1837754364612 + "value": 93.8079292226496 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22995,21 +24695,38 @@ "time" ], "times": { - "compilation": 20207.761, - "data": 62147.989, - "framework": 3529478.113, - "kernel_overhead": 1685274.412, - "profiling_overhead": 52019.95, - "profiling_runs": 1730035.762, + "compilation": 15904.724, + "data": 61010.68, + "framework": 3582105.234, + "kernel_overhead": 1713822.314, + "profiling_overhead": 51311.063, + "profiling_runs": 1755961.177, "runtimes": [ - 6854.56 + 6185.28 ], - "search_algorithm": 26.676, - "validation": 25.506 + "search_algorithm": 25.589, + "validation": 17.55 }, - "timestamp": "2026-01-27 09:26:3 UTC" + "timestamp": "2026-03-02 14:28:16 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -23026,61 +24743,61 @@ { "name": "time", "unit": "", - "value": 6167.584 + "value": 5805.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.884196079605456 + "value": 8.425012899157256 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103580.0 + "value": 10032.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866632.0 + "value": 1871024.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.005123991877393 + "value": 1.07813930981866 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2197182.0 + "value": 104952.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100460.0 + "value": 2102872.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6352051599499073 + "value": 0.4408702389081566 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01984664205088687 + "value": 0.020660415702763335 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23110,13 +24827,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97671475196458 + "value": 82.01765084994538 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96966312844712 + "value": 99.9655530484588 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23128,7 +24845,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23140,7 +24857,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5811732480.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -23152,13 +24869,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 987234304.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -23170,13 +24887,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 898924544.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.29648965208124 + "value": 43.94614183416958 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23188,13 +24905,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.10550490901888 + "value": 28.21807450595713 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.43344984656756264 + "value": 0.34101432813595656 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23206,7 +24923,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.87767965534688 + "value": 94.49935943287379 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23225,21 +24942,38 @@ "time" ], "times": { - "compilation": 16247.971, - "data": 60003.504, - "framework": 3459799.704, - "kernel_overhead": 1653925.048, - "profiling_overhead": 49848.984, - "profiling_runs": 1696022.168, + "compilation": 73779.137, + "data": 64356.326, + "framework": 3490051.291, + "kernel_overhead": 1665283.676, + "profiling_overhead": 54349.525, + "profiling_runs": 1706061.764, "runtimes": [ - 6167.584 + 5805.728 ], - "search_algorithm": 28.137, - "validation": 13.909 + "search_algorithm": 22.747, + "validation": 18.805 }, - "timestamp": "2026-01-27 09:26:4 UTC" + "timestamp": "2026-03-02 14:28:18 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -23256,61 +24990,61 @@ { "name": "time", "unit": "", - "value": 6131.936 + "value": 5896.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.973215361280854 + "value": 8.334792862653446 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098148.0 + "value": 9612.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869576.0 + "value": 1871080.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.014293052831512 + "value": 1.0815447367043287 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2192694.0 + "value": 107987.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099285.0 + "value": 2109131.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6351765967042945 + "value": 0.4408711471858047 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01984774607910102 + "value": 0.02066219718858926 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23340,13 +25074,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.93257044255819 + "value": 81.97649257552028 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97355753824115 + "value": 99.97360884796042 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23358,7 +25092,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23370,7 +25104,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5811732480.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -23382,13 +25116,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 987234304.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -23400,13 +25134,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 898924544.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.297251209720734 + "value": 43.946291752640626 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23418,13 +25152,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.10595679559264 + "value": 28.218233678440164 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4334570727810834 + "value": 0.3410162517291963 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23436,7 +25170,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.87919410715813 + "value": 94.49989006696893 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23455,21 +25189,38 @@ "time" ], "times": { - "compilation": 17582.338, - "data": 62548.75, - "framework": 3467482.934, - "kernel_overhead": 1655460.517, - "profiling_overhead": 52566.669, - "profiling_runs": 1696906.998, + "compilation": 15551.976, + "data": 61681.883, + "framework": 3489265.607, + "kernel_overhead": 1667470.908, + "profiling_overhead": 51777.778, + "profiling_runs": 1708335.038, "runtimes": [ - 6131.936 + 5896.256 ], - "search_algorithm": 25.346, - "validation": 17.49 + "search_algorithm": 26.216, + "validation": 24.103 }, - "timestamp": "2026-01-27 09:26:6 UTC" + "timestamp": "2026-03-02 14:28:20 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -23486,61 +25237,61 @@ { "name": "time", "unit": "", - "value": 6127.68 + "value": 5891.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.836506971598812 + "value": 8.20072907403607 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097616.0 + "value": 6588.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837504.0 + "value": 1834868.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.017043860853979 + "value": 1.0765140230514716 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2197977.0 + "value": 102742.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099175.0 + "value": 2100462.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6353684699560179 + "value": 0.4408777573354544 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01985092485310267 + "value": 0.02066237299148047 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23570,13 +25321,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98752936183982 + "value": 82.01044763138049 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97313651139189 + "value": 99.9753148287838 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23588,7 +25339,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23600,7 +25351,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5811732480.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -23612,13 +25363,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 987234304.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -23630,13 +25381,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 898924544.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.303758370448634 + "value": 43.94589399996889 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23648,13 +25399,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.110412201929663 + "value": 28.21799225073554 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.433528320123631 + "value": 0.3410133340848167 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23666,7 +25417,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.89413101207882 + "value": 94.49908186639601 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23685,21 +25436,38 @@ "time" ], "times": { - "compilation": 15873.069, - "data": 60959.931, - "framework": 3463857.324, - "kernel_overhead": 1655151.421, - "profiling_overhead": 50763.829, - "profiling_runs": 1696982.143, + "compilation": 15628.28, + "data": 60607.116, + "framework": 3489120.392, + "kernel_overhead": 1668402.632, + "profiling_overhead": 50940.681, + "profiling_runs": 1709169.963, "runtimes": [ - 6127.68 + 5891.712 ], - "search_algorithm": 23.922, - "validation": 20.12 + "search_algorithm": 24.941, + "validation": 17.959 }, - "timestamp": "2026-01-27 09:26:8 UTC" + "timestamp": "2026-03-02 14:28:21 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -23716,61 +25484,61 @@ { "name": "time", "unit": "", - "value": 6148.032 + "value": 5940.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.81448862085903 + "value": 8.186546045165684 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103076.0 + "value": 8192.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840500.0 + "value": 1838588.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.0234475421224705 + "value": 1.0813241380591092 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2207337.0 + "value": 107362.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108751.0 + "value": 2109209.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.635351905476862 + "value": 0.44087576277698953 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019851266710491956 + "value": 0.020661806680617196 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23800,13 +25568,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.9538040075383 + "value": 81.97973547106659 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97838864579919 + "value": 99.97581405887304 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23818,7 +25586,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23830,7 +25598,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5811732480.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -23842,13 +25610,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 987234304.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -23860,13 +25628,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 898924544.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.302178074540734 + "value": 43.94469589097078 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23878,13 +25646,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.109454869038675 + "value": 28.21707795379767 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.43351301121143393 + "value": 0.34100228484203726 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23896,7 +25664,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.89092182989874 + "value": 94.49602145395343 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23915,21 +25683,38 @@ "time" ], "times": { - "compilation": 15136.492, - "data": 60236.566, - "framework": 3462842.324, - "kernel_overhead": 1655265.842, - "profiling_overhead": 50161.7, - "profiling_runs": 1697178.216, + "compilation": 15978.381, + "data": 66397.71, + "framework": 3497656.438, + "kernel_overhead": 1666869.345, + "profiling_overhead": 56462.576, + "profiling_runs": 1707926.807, "runtimes": [ - 6148.032 + 5940.928 ], - "search_algorithm": 25.4, - "validation": 14.897 + "search_algorithm": 28.082, + "validation": 21.911 }, - "timestamp": "2026-01-27 09:26:10 UTC" + "timestamp": "2026-03-02 14:28:23 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -23946,61 +25731,61 @@ { "name": "time", "unit": "", - "value": 6127.008 + "value": 5914.976 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.898960656956653 + "value": 8.178480924652092 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113656.0 + "value": 12872.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842680.0 + "value": 1831628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.0246156641227504 + "value": 1.0751625781954461 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2215133.0 + "value": 108330.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109759.0 + "value": 2101734.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.636231290062955 + "value": 0.4415311033885332 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019877237996958046 + "value": 0.02069249537348944 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24030,13 +25815,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.87668246169547 + "value": 81.83479691268009 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96557414596381 + "value": 99.97294182165494 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24048,7 +25833,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24060,7 +25845,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5811732480.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -24072,13 +25857,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 987234304.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -24090,13 +25875,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 898924544.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.358902700716406 + "value": 44.01085691181371 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24108,13 +25893,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.148401680982566 + "value": 28.25980027042804 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4341358178965718 + "value": 0.3415185823696748 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24126,7 +25911,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.02151751142915 + "value": 94.6391095724791 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24145,21 +25930,38 @@ "time" ], "times": { - "compilation": 16280.125, - "data": 63785.982, - "framework": 3473838.756, - "kernel_overhead": 1657406.562, - "profiling_overhead": 53476.752, - "profiling_runs": 1699169.46, + "compilation": 16817.297, + "data": 63565.106, + "framework": 3494574.2929999996, + "kernel_overhead": 1668209.927, + "profiling_overhead": 53583.877, + "profiling_runs": 1709215.383, "runtimes": [ - 6127.008 + 5914.976 ], - "search_algorithm": 30.252, - "validation": 17.994 + "search_algorithm": 35.515, + "validation": 22.316 }, - "timestamp": "2026-01-27 09:26:12 UTC" + "timestamp": "2026-03-02 14:28:25 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -24176,61 +25978,61 @@ { "name": "time", "unit": "", - "value": 6867.488 + "value": 6857.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.17269240442656 + "value": 6.868628600996479 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098048.0 + "value": 4596.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869036.0 + "value": 1872920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7701471849423482 + "value": 0.9244723215811693 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2201040.0 + "value": 125510.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099173.0 + "value": 2108398.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.956233192546485 + "value": 47.984419350454 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134742016.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5597051413373055 + "value": 0.5622013927043412 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24260,13 +26062,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.16893691135152 + "value": 65.42735586680055 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95139520899937 + "value": 99.95347979923307 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24278,7 +26080,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24302,7 +26104,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4328521728.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -24320,13 +26122,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 529006592.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.325843592569584 + "value": 21.37338247270446 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24338,13 +26140,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.892365531390006 + "value": 23.998423536862138 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.359013295674966 + "value": 24.373398904625606 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24356,7 +26158,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.085145702381816 + "value": 47.20040964654258 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24375,21 +26177,38 @@ "time" ], "times": { - "compilation": 20165.277, - "data": 63513.234, - "framework": 520007.766, - "kernel_overhead": 177352.115, - "profiling_overhead": 53331.493, - "profiling_runs": 225810.924, + "compilation": 40977.46, + "data": 65573.323, + "framework": 519523.851, + "kernel_overhead": 175370.554, + "profiling_overhead": 55384.183, + "profiling_runs": 223195.791, "runtimes": [ - 6867.488 + 6857.76 ], - "search_algorithm": 31.523, - "validation": 20.895 + "search_algorithm": 31.675, + "validation": 18.574 }, - "timestamp": "2026-01-27 09:26:12 UTC" + "timestamp": "2026-03-02 14:28:25 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -24406,61 +26225,61 @@ { "name": "time", "unit": "", - "value": 6940.288 + "value": 7351.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.884589678712942 + "value": 7.086673721051312 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2112684.0 + "value": 9932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1878196.0 + "value": 1871708.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7852167277825322 + "value": 0.9204799370719983 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2228589.0 + "value": 124777.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103846.0 + "value": 2102818.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.001785978871006 + "value": 48.0113898022368 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134742016.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5602516595925187 + "value": 0.562439115081832 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24490,13 +26309,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.39998394347539 + "value": 77.34138581322715 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96367328301893 + "value": 99.9302381097528 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24508,7 +26327,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24532,7 +26351,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4328521728.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -24550,13 +26369,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 529006592.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.344458596856413 + "value": 21.38670063458577 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24568,13 +26387,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.91275752903074 + "value": 24.014154971926803 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.37980357451962 + "value": 24.38937614336316 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24586,7 +26405,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.1253507782709 + "value": 47.23137145942153 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24605,21 +26424,38 @@ "time" ], "times": { - "compilation": 22193.589, - "data": 62466.989, - "framework": 516032.46400000004, - "kernel_overhead": 177178.21, - "profiling_overhead": 51678.671, - "profiling_runs": 224708.594, + "compilation": 17472.586, + "data": 64183.494, + "framework": 516428.356, + "kernel_overhead": 174892.757, + "profiling_overhead": 54378.069, + "profiling_runs": 222974.036, "runtimes": [ - 6940.288 + 7351.744 ], - "search_algorithm": 36.99, - "validation": 18.936 + "search_algorithm": 28.259, + "validation": 23.189 }, - "timestamp": "2026-01-27 09:26:12 UTC" + "timestamp": "2026-03-02 14:28:26 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -24636,61 +26472,61 @@ { "name": "time", "unit": "", - "value": 7035.136 + "value": 6976.0 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.434834070485488 + "value": 6.644116301424306 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104288.0 + "value": 1632.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839244.0 + "value": 1838616.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7823251522435732 + "value": 0.9146661885816736 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2218292.0 + "value": 114829.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100070.0 + "value": 2100167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.959798828703555 + "value": 47.98473757772827 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134742016.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5597260471696622 + "value": 0.5621882105522493 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24720,13 +26556,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.76308105356907 + "value": 65.15492998046548 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95129837778576 + "value": 99.95245140329449 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24738,7 +26574,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24762,7 +26598,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4328521728.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -24780,13 +26616,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 529006592.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.32509940290335 + "value": 21.373790140964353 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24798,13 +26634,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.893281094732266 + "value": 23.99810774703223 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.359946741113756 + "value": 24.373078180579608 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24816,7 +26652,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.0869494866677 + "value": 47.199792659419046 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24835,21 +26671,38 @@ "time" ], "times": { - "compilation": 22764.635, - "data": 62352.33, - "framework": 520303.485, - "kernel_overhead": 178910.441, - "profiling_overhead": 51921.09, - "profiling_runs": 227119.624, + "compilation": 19606.853, + "data": 64840.07, + "framework": 520539.77800000005, + "kernel_overhead": 176220.746, + "profiling_overhead": 54907.267, + "profiling_runs": 224571.695, "runtimes": [ - 7035.136 + 6976.0 ], - "search_algorithm": 29.74, - "validation": 18.091 + "search_algorithm": 29.386, + "validation": 27.52 }, - "timestamp": "2026-01-27 09:26:13 UTC" + "timestamp": "2026-03-02 14:28:26 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -24866,61 +26719,61 @@ { "name": "time", "unit": "", - "value": 7026.4 + "value": 6989.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.814499649416014 + "value": 6.915154728243795 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109348.0 + "value": 16336.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839112.0 + "value": 1843148.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7798866479232682 + "value": 0.9274240800897757 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2223148.0 + "value": 133058.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100906.0 + "value": 2110037.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.003260781293925 + "value": 48.01459473779753 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134742016.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5603076699208502 + "value": 0.5624917818316483 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24950,13 +26803,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.18955652832886 + "value": 80.66695814866613 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98279995268072 + "value": 99.93940021718852 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24968,7 +26821,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24992,7 +26845,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4328521728.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -25010,13 +26863,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 529006592.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.342378478743107 + "value": 21.387127921726254 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25028,13 +26881,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.91057321319659 + "value": 24.01420191235312 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.377576596266838 + "value": 24.389423817233634 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25046,7 +26899,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.12104610325507 + "value": 47.23146378249991 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25065,21 +26918,38 @@ "time" ], "times": { - "compilation": 22548.53, - "data": 61242.125, - "framework": 515661.415, - "kernel_overhead": 177552.24, - "profiling_overhead": 50823.182, - "profiling_runs": 226043.868, + "compilation": 20670.828, + "data": 63792.536, + "framework": 519296.566, + "kernel_overhead": 176870.958, + "profiling_overhead": 53785.059, + "profiling_runs": 224848.013, "runtimes": [ - 7026.4 + 6989.568 ], - "search_algorithm": 33.434, - "validation": 20.103 + "search_algorithm": 30.822, + "validation": 23.317 }, - "timestamp": "2026-01-27 09:26:13 UTC" + "timestamp": "2026-03-02 14:28:26 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 256 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -25096,61 +26966,61 @@ { "name": "time", "unit": "", - "value": 6908.288 + "value": 7272.128 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.736864546261222 + "value": 7.0130426458042505 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110940.0 + "value": 11380.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843048.0 + "value": 1840548.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.786935671223272 + "value": 0.9246018763390682 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2227946.0 + "value": 123697.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104044.0 + "value": 2101652.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.258697936058596 + "value": 48.271523290418806 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134742016.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5631868985714454 + "value": 0.5655291938395839 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25180,13 +27050,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.46661833729829 + "value": 93.94260047624633 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96855575264703 + "value": 99.96757013686113 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25198,7 +27068,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8606711808.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -25222,7 +27092,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4328521728.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -25240,13 +27110,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 529006592.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.454862972783815 + "value": 21.497111321867386 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25258,13 +27128,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.03686588394612 + "value": 24.1370732236344 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.506335920741947 + "value": 24.51421499275369 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25276,7 +27146,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.36997300150607 + "value": 47.47316833723221 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25295,21 +27165,38 @@ "time" ], "times": { - "compilation": 22449.403, - "data": 61344.614, - "framework": 512588.283, - "kernel_overhead": 176204.886, - "profiling_overhead": 51150.698, - "profiling_runs": 223888.085, + "compilation": 23126.145, + "data": 64273.489, + "framework": 518086.995, + "kernel_overhead": 175597.522, + "profiling_overhead": 54436.64, + "profiling_runs": 223779.344, "runtimes": [ - 6908.288 + 7272.128 ], - "search_algorithm": 36.889, - "validation": 16.793 + "search_algorithm": 44.582, + "validation": 25.374 }, - "timestamp": "2026-01-27 09:26:13 UTC" + "timestamp": "2026-03-02 14:28:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -25326,61 +27213,61 @@ { "name": "time", "unit": "", - "value": 3715.424 + "value": 3609.792 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.296723002513936 + "value": 13.67273794716617 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2106580.0 + "value": 10984.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869920.0 + "value": 1874480.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.322192565968719 + "value": 1.7219822615106954 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2154956.0 + "value": 76147.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100456.0 + "value": 2102301.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.71266666468405 + "value": 45.68627062936657 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5313440506238389 + "value": 0.5352176001133331 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25410,13 +27297,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 78.89500753202125 + "value": 73.27188227072129 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9167036894262 + "value": 99.90380375730335 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25428,7 +27315,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -25452,7 +27339,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -25470,13 +27357,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 335020032.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.849465048786982 + "value": 28.973583641123852 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25488,13 +27375,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37915815107689 + "value": 45.7158788340919 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.309997253385202 + "value": 23.30438354628513 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25506,7 +27393,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.635887066146736 + "value": 56.87755275685799 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25525,21 +27412,38 @@ "time" ], "times": { - "compilation": 22823.578, - "data": 61027.107, - "framework": 333238.067, - "kernel_overhead": 95353.359, - "profiling_overhead": 50751.99, - "profiling_runs": 126105.611, + "compilation": 69863.99, + "data": 62028.658, + "framework": 332236.194, + "kernel_overhead": 93690.086, + "profiling_overhead": 52143.289, + "profiling_runs": 124374.161, "runtimes": [ - 3715.424 + 3609.792 ], - "search_algorithm": 38.825, - "validation": 16.983 + "search_algorithm": 36.286, + "validation": 30.05 }, - "timestamp": "2026-01-27 09:26:14 UTC" + "timestamp": "2026-03-02 14:28:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -25556,61 +27460,61 @@ { "name": "time", "unit": "", - "value": 3689.728 + "value": 3731.328 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.246607855827605 + "value": 13.566689944134078 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097792.0 + "value": 13832.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868556.0 + "value": 1882292.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3196487064455327 + "value": 1.7533247092169362 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2149422.0 + "value": 78103.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2142399.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.746751146194235 + "value": 45.731449886225505 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5317641501794482 + "value": 0.5357083301817018 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25640,13 +27544,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.8569839203346 + "value": 91.99812484763592 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95462337673156 + "value": 99.9389231183016 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25658,7 +27562,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -25682,7 +27586,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -25700,13 +27604,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 335020032.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.861640347759447 + "value": 28.99049548125157 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25718,13 +27622,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.39780747404921 + "value": 45.74171512207048 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.319576886083873 + "value": 23.31755399777421 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25736,7 +27640,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.65918874348026 + "value": 56.909727556666276 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25755,21 +27659,38 @@ "time" ], "times": { - "compilation": 22256.442, - "data": 60527.034, - "framework": 329791.705, - "kernel_overhead": 94072.858, - "profiling_overhead": 50387.787, - "profiling_runs": 124804.026, + "compilation": 26875.376, + "data": 62327.017, + "framework": 334778.735, + "kernel_overhead": 94815.817, + "profiling_overhead": 52303.654, + "profiling_runs": 125332.247, "runtimes": [ - 3689.728 + 3731.328 ], - "search_algorithm": 30.34, - "validation": 16.898 + "search_algorithm": 36.55, + "validation": 25.646 }, - "timestamp": "2026-01-27 09:26:14 UTC" + "timestamp": "2026-03-02 14:28:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -25786,61 +27707,61 @@ { "name": "time", "unit": "", - "value": 3647.744 + "value": 3612.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.12676463623447 + "value": 13.333346870010768 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109592.0 + "value": 884.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843312.0 + "value": 1837744.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.339423251173719 + "value": 1.7060066325294407 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2174930.0 + "value": 61013.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102892.0 + "value": 2097343.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.7150028910367 + "value": 45.68644386944465 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5313036163117085 + "value": 0.5352252640437013 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25870,13 +27791,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 78.39865865637181 + "value": 74.35289880582513 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.8961127169075 + "value": 99.89440637358206 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25888,7 +27809,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -25912,7 +27833,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -25930,13 +27851,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 335020032.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.85161600364488 + "value": 28.976198239327367 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25948,13 +27869,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.38505789551006 + "value": 45.72083415187201 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31302778617021 + "value": 23.30690959695038 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25966,7 +27887,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.643248438128815 + "value": 56.883718612170966 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25985,21 +27906,38 @@ "time" ], "times": { - "compilation": 24698.122, - "data": 62071.893, - "framework": 333937.608, - "kernel_overhead": 95095.053, - "profiling_overhead": 51044.098, - "profiling_runs": 125726.564, + "compilation": 28474.59, + "data": 61982.488, + "framework": 335090.447, + "kernel_overhead": 95164.305, + "profiling_overhead": 52130.775, + "profiling_runs": 125812.879, "runtimes": [ - 3647.744 + 3612.32 ], - "search_algorithm": 33.455, - "validation": 21.326 + "search_algorithm": 38.232, + "validation": 24.424 }, - "timestamp": "2026-01-27 09:26:14 UTC" + "timestamp": "2026-03-02 14:28:27 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26016,61 +27954,61 @@ { "name": "time", "unit": "", - "value": 3874.4 + "value": 3608.064 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.439386015666727 + "value": 13.342654873834034 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105984.0 + "value": 440.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839668.0 + "value": 1837832.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3493587572195014 + "value": 1.710952072827599 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2169775.0 + "value": 62018.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101461.0 + "value": 2099165.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.744860923097875 + "value": 45.73453935989875 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.531707833944502 + "value": 0.5357120377771154 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26100,13 +28038,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.77501175740768 + "value": 91.89972194256015 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9468947646434 + "value": 99.93868989785754 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26118,7 +28056,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26142,7 +28080,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -26160,13 +28098,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 335020032.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.86008237705141 + "value": 28.990891918773894 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26178,13 +28116,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.396509753279005 + "value": 45.74213844211486 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31891028342261 + "value": 23.317769791781206 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26196,7 +28134,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.65756911006299 + "value": 56.910254231897625 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26215,21 +28153,38 @@ "time" ], "times": { - "compilation": 24153.813, - "data": 61606.659, - "framework": 331461.613, - "kernel_overhead": 93772.547, - "profiling_overhead": 50944.498, - "profiling_runs": 125137.909, + "compilation": 29599.36, + "data": 61802.434, + "framework": 331777.004, + "kernel_overhead": 93760.133, + "profiling_overhead": 51903.502, + "profiling_runs": 124310.935, "runtimes": [ - 3874.4 + 3608.064 ], - "search_algorithm": 33.304, - "validation": 19.688 + "search_algorithm": 41.12, + "validation": 25.888 }, - "timestamp": "2026-01-27 09:26:14 UTC" + "timestamp": "2026-03-02 14:28:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 39 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26246,61 +28201,61 @@ { "name": "time", "unit": "", - "value": 3669.792 + "value": 3614.752 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.18861177333578 + "value": 13.338660453554136 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097848.0 + "value": 6928.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837192.0 + "value": 1840628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3446875923765362 + "value": 1.7196334912365066 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2156893.0 + "value": 68891.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098973.0 + "value": 2106478.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.74793350461189 + "value": 45.724493631838364 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5317793961557972 + "value": 0.5356029333283208 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26330,13 +28285,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.6212967145825 + "value": 92.71807619170482 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94871621072991 + "value": 99.92591712140666 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26348,7 +28303,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6459228160.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26372,7 +28327,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -26390,13 +28345,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 335020032.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.864332642636192 + "value": 28.988921838549814 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26408,13 +28363,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.40179223141415 + "value": 45.738668166024354 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.32162374387094 + "value": 23.31600076432101 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26426,7 +28381,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.66419950779592 + "value": 56.90597450133772 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26445,21 +28400,38 @@ "time" ], "times": { - "compilation": 25614.132, - "data": 60135.216, - "framework": 329853.397, - "kernel_overhead": 94765.643, - "profiling_overhead": 50065.833, - "profiling_runs": 124886.705, + "compilation": 29521.183, + "data": 61763.649, + "framework": 333966.066, + "kernel_overhead": 94999.219, + "profiling_overhead": 51639.092, + "profiling_runs": 125564.106, "runtimes": [ - 3669.792 + 3614.752 ], - "search_algorithm": 36.866, - "validation": 21.569 + "search_algorithm": 40.285, + "validation": 25.714 }, - "timestamp": "2026-01-27 09:26:14 UTC" + "timestamp": "2026-03-02 14:28:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26476,61 +28448,61 @@ { "name": "time", "unit": "", - "value": 2227.104 + "value": 2158.432 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 47.40238129748863 + "value": 22.589428765097363 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103052.0 + "value": 5176.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873712.0 + "value": 1871720.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.527479340741403 + "value": 2.813253982496849 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2127325.0 + "value": 44059.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2107031.0 + "value": 2100164.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.377380783446675 + "value": 37.922619458386436 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.442455252175729 + "value": 0.4441424780009324 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26560,13 +28532,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.59439773776062 + "value": 95.7460904306492 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90735344885012 + "value": 99.90196987246175 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26578,7 +28550,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26602,7 +28574,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -26620,13 +28592,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 229376000.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.216044764767446 + "value": 43.23505892934158 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26638,13 +28610,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.58238751329891 + "value": 75.87469630705151 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.70751705669024 + "value": 19.487348758549363 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26656,7 +28628,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.58558229163997 + "value": 64.53897290293878 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26675,21 +28647,38 @@ "time" ], "times": { - "compilation": 24745.366, - "data": 67331.57, - "framework": 220604.3, - "kernel_overhead": 36462.417, - "profiling_overhead": 57099.593, - "profiling_runs": 59710.72, + "compilation": 89996.223, + "data": 61670.272, + "framework": 207894.32799999998, + "kernel_overhead": 35663.58, + "profiling_overhead": 51169.338, + "profiling_runs": 59391.138, "runtimes": [ - 2227.104 + 2158.432 ], - "search_algorithm": 35.002, - "validation": 21.751 + "search_algorithm": 39.581, + "validation": 27.727 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26706,61 +28695,61 @@ { "name": "time", "unit": "", - "value": 2243.2 + "value": 2198.208 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 46.99424404490973 + "value": 22.41712786259542 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097380.0 + "value": 5464.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1867280.0 + "value": 1873988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.485041632510392 + "value": 2.822642145390205 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2118814.0 + "value": 46225.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2104490.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.37824776154798 + "value": 37.92319592697868 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.44253870930711525 + "value": 0.44413108047409044 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26790,13 +28779,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.11724651586354 + "value": 94.78152050249173 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9125433573501 + "value": 99.90282656018074 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26808,7 +28797,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26832,7 +28821,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -26850,13 +28839,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 229376000.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.22219377794553 + "value": 43.23406626030107 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26868,13 +28857,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.59271724100114 + "value": 75.87209859564655 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.710210452487605 + "value": 19.48668157290531 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26886,7 +28875,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.59446091831055 + "value": 64.53681868965265 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26905,21 +28894,38 @@ "time" ], "times": { - "compilation": 24818.291, - "data": 60824.515, - "framework": 206011.906, - "kernel_overhead": 35679.019, - "profiling_overhead": 50414.61, - "profiling_runs": 59093.762, + "compilation": 30375.567, + "data": 62044.227, + "framework": 209254.79, + "kernel_overhead": 36200.66, + "profiling_overhead": 51449.713, + "profiling_runs": 59560.19, "runtimes": [ - 2243.2 + 2198.208 ], - "search_algorithm": 37.835, - "validation": 24.075 + "search_algorithm": 38.897, + "validation": 28.591 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -26936,61 +28942,61 @@ { "name": "time", "unit": "", - "value": 2286.592 + "value": 2221.344 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 46.21789486317811 + "value": 21.860583510962243 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103584.0 + "value": 604.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839712.0 + "value": 1837224.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.492701716602163 + "value": 2.786079819724869 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2141445.0 + "value": 37488.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100481.0 + "value": 2097435.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.393860933541916 + "value": 37.92201729243877 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.44275779953913624 + "value": 0.4440923974565519 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27020,13 +29026,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.34868329403355 + "value": 95.05945442013373 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92449524511885 + "value": 99.89472857797642 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27038,7 +29044,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27062,7 +29068,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -27080,13 +29086,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 229376000.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.238428726487285 + "value": 43.23386730387939 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27098,13 +29104,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.62109531066534 + "value": 75.87164032059628 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.717609812448874 + "value": 19.48656387140315 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27116,7 +29122,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.61865719896913 + "value": 64.53637828756918 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27135,21 +29141,38 @@ "time" ], "times": { - "compilation": 25016.607, - "data": 60750.75, - "framework": 205474.736, - "kernel_overhead": 35492.608, - "profiling_overhead": 50324.931, - "profiling_runs": 58906.447, + "compilation": 30133.442, + "data": 61238.729, + "framework": 206991.187, + "kernel_overhead": 35625.614, + "profiling_overhead": 50699.321, + "profiling_runs": 59427.523, "runtimes": [ - 2286.592 + 2221.344 ], - "search_algorithm": 31.257, - "validation": 19.702 + "search_algorithm": 40.259, + "validation": 25.103 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -27166,61 +29189,61 @@ { "name": "time", "unit": "", - "value": 2227.776 + "value": 2208.672 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 45.97497912130522 + "value": 22.075487410788128 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103360.0 + "value": 1348.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842632.0 + "value": 1843640.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.470672578879817 + "value": 2.8358902756560203 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2142422.0 + "value": 37998.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100630.0 + "value": 2120927.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.39571321899251 + "value": 37.923868424383805 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4427572012902221 + "value": 0.4441155101504952 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27250,13 +29273,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.66435442012794 + "value": 95.04745675686571 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92053861901766 + "value": 99.89049050720264 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27268,7 +29291,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27292,7 +29315,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -27310,13 +29333,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 229376000.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.240432905694384 + "value": 43.23719507818382 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27328,13 +29351,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.6239875517271 + "value": 75.87880823038496 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.718363941710095 + "value": 19.488404848233635 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27346,7 +29369,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.62118159905617 + "value": 64.54252590598912 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27365,21 +29388,38 @@ "time" ], "times": { - "compilation": 25105.473, - "data": 60110.826, - "framework": 204843.752, - "kernel_overhead": 35675.296, - "profiling_overhead": 50109.825, - "profiling_runs": 58947.805, + "compilation": 29996.506, + "data": 61573.195, + "framework": 206753.488, + "kernel_overhead": 35366.118, + "profiling_overhead": 50734.227, + "profiling_runs": 59079.948, "runtimes": [ - 2227.776 + 2208.672 ], - "search_algorithm": 35.152, - "validation": 20.563 + "search_algorithm": 41.103, + "validation": 28.744 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -27396,61 +29436,61 @@ { "name": "time", "unit": "", - "value": 2224.416 + "value": 2197.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 46.7203986377596 + "value": 21.848432669576287 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097352.0 + "value": 5252.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836072.0 + "value": 1840168.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.514706955445585 + "value": 2.831065951788857 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2132920.0 + "value": 40832.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2102356.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.462372347784104 + "value": 37.98765388840588 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4434512182909192 + "value": 0.4447545082156342 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27480,13 +29520,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.34065665184595 + "value": 95.47758596609326 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9087959345794 + "value": 99.87374468284366 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27498,7 +29538,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5385486336.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27522,7 +29562,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -27540,13 +29580,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 229376000.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.31259001296101 + "value": 43.307155872731926 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27558,13 +29598,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.75142963842802 + "value": 76.00072435771385 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.751593470176058 + "value": 19.519717291092523 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27576,7 +29616,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.73008167818027 + "value": 64.64622778257359 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27595,21 +29635,38 @@ "time" ], "times": { - "compilation": 26077.985, - "data": 61222.884, - "framework": 206775.74099999998, - "kernel_overhead": 35799.381, - "profiling_overhead": 50509.746, - "profiling_runs": 59243.73, + "compilation": 29907.214, + "data": 61476.263, + "framework": 207324.82400000002, + "kernel_overhead": 35645.678, + "profiling_overhead": 51069.523, + "profiling_runs": 59133.36, "runtimes": [ - 2224.416 + 2197.568 ], - "search_algorithm": 35.211, - "validation": 22.052 + "search_algorithm": 38.556, + "validation": 26.658 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -27626,61 +29683,61 @@ { "name": "time", "unit": "", - "value": 2003.52 + "value": 1904.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 52.989339593596064 + "value": 25.728481407866038 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098056.0 + "value": 336.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1867344.0 + "value": 1871940.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.224973343480999 + "value": 3.2027139801926654 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2117144.0 + "value": 34742.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099110.0 + "value": 2102529.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.09051461623991 + "value": 21.532818952394678 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2509218898412472 + "value": 0.2521254360977023 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27710,13 +29767,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.7580262439909 + "value": 98.03762980334112 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9140180807518 + "value": 99.89752210534589 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27728,7 +29785,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27752,7 +29809,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -27770,13 +29827,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 193331200.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.57270160426023 + "value": 43.61988335619086 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27788,13 +29845,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.72171023748695 + "value": 86.14709725291948 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.510483552396929 + "value": 11.231091292250733 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27806,7 +29863,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.73916838327478 + "value": 61.70903862595839 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27825,21 +29882,38 @@ "time" ], "times": { - "compilation": 26382.749, - "data": 59961.94, - "framework": 199872.67299999998, - "kernel_overhead": 34111.715, - "profiling_overhead": 49746.292, - "profiling_runs": 56052.726, + "compilation": 92478.145, + "data": 60671.582, + "framework": 203472.038, + "kernel_overhead": 34987.875, + "profiling_overhead": 50519.939, + "profiling_runs": 57292.642, "runtimes": [ - 2003.52 + 1904.224 ], - "search_algorithm": 35.301, - "validation": 19.471 + "search_algorithm": 37.246, + "validation": 27.83 }, - "timestamp": "2026-01-27 09:26:15 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -27856,61 +29930,61 @@ { "name": "time", "unit": "", - "value": 1950.496 + "value": 1938.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.14600220603325 + "value": 25.44638197947469 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105652.0 + "value": 3116.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872288.0 + "value": 1869412.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.252448325682525 + "value": 3.181874046185721 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2124781.0 + "value": 35622.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101420.0 + "value": 2098581.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.09142455059159 + "value": 21.53388770603989 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2508727105798032 + "value": 0.2522032378807293 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27940,13 +30014,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.18714709197381 + "value": 97.64385334531833 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98014536856779 + "value": 99.92203311768397 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27958,7 +30032,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27982,7 +30056,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -28000,13 +30074,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 193331200.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.53508996036249 + "value": 43.62319308319583 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28018,13 +30092,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.64822368371345 + "value": 86.1525423145676 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.500615973154883 + "value": 11.231801170893336 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28036,7 +30110,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.68629874769482 + "value": 61.7129900719678 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28055,21 +30129,38 @@ "time" ], "times": { - "compilation": 27289.909, - "data": 60155.051, - "framework": 199419.195, - "kernel_overhead": 33850.831, - "profiling_overhead": 49801.982, - "profiling_runs": 55611.331, + "compilation": 29357.396, + "data": 61101.375, + "framework": 203570.788, + "kernel_overhead": 34707.733, + "profiling_overhead": 50720.061, + "profiling_runs": 57041.619, "runtimes": [ - 1950.496 + 1938.496 ], - "search_algorithm": 31.356, - "validation": 20.346 + "search_algorithm": 39.914, + "validation": 36.627 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -28086,61 +30177,61 @@ { "name": "time", "unit": "", - "value": 1953.536 + "value": 1936.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 52.78193121058404 + "value": 24.807319599147416 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102712.0 + "value": 6712.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839600.0 + "value": 1840572.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.257521543565299 + "value": 3.1882705726073115 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2137877.0 + "value": 39513.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103722.0 + "value": 2102964.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.116092669519464 + "value": 21.5335759407197 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2511858145427756 + "value": 0.25216783690355626 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28170,13 +30261,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.23324278305016 + "value": 97.90255884991794 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93503354765622 + "value": 99.91015507562675 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28188,7 +30279,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28212,7 +30303,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -28230,13 +30321,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 193331200.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.60976235749119 + "value": 43.621865086697966 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28248,13 +30339,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.79382857068634 + "value": 86.1506903523413 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.520167410614622 + "value": 11.231559728552307 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28266,7 +30357,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.79111513352752 + "value": 61.7116031340357 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28285,21 +30376,38 @@ "time" ], "times": { - "compilation": 26340.387, - "data": 60031.96, - "framework": 198818.101, - "kernel_overhead": 33506.718, - "profiling_overhead": 49992.767, - "profiling_runs": 55286.656, + "compilation": 30126.4, + "data": 60269.897, + "framework": 201872.278, + "kernel_overhead": 34666.708, + "profiling_overhead": 49775.628, + "profiling_runs": 57160.045, "runtimes": [ - 1953.536 + 1936.8 ], - "search_algorithm": 31.173, - "validation": 20.136 + "search_algorithm": 42.883, + "validation": 26.109 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -28316,61 +30424,61 @@ { "name": "time", "unit": "", - "value": 2023.871 + "value": 1958.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 52.42745250740476 + "value": 24.60400787818716 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103036.0 + "value": 3324.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839240.0 + "value": 1839528.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.238501887956158 + "value": 3.1523016100213512 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2136117.0 + "value": 36144.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103552.0 + "value": 2100650.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.117256060330014 + "value": 21.533421166963688 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.251195904492815 + "value": 0.2521724684838284 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28400,13 +30508,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.8267354515749 + "value": 97.60024435546994 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93449815860195 + "value": 99.90478535891327 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28418,7 +30526,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28442,7 +30550,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -28460,13 +30568,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 193331200.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.61172923594635 + "value": 43.62444059290934 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28478,13 +30586,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.79773449618698 + "value": 86.15690322866071 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.520691887915731 + "value": 11.232369708033403 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28496,7 +30604,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.79398070824909 + "value": 61.71611389212334 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28515,21 +30623,38 @@ "time" ], "times": { - "compilation": 26429.369, - "data": 60229.799, - "framework": 197830.326, - "kernel_overhead": 33103.665, - "profiling_overhead": 49654.386, - "profiling_runs": 54842.476, + "compilation": 30348.25, + "data": 60567.716, + "framework": 202683.219, + "kernel_overhead": 34760.182, + "profiling_overhead": 50262.53, + "profiling_runs": 57092.791, "runtimes": [ - 2023.871 + 1958.016 ], - "search_algorithm": 31.454, - "validation": 16.638 + "search_algorithm": 42.363, + "validation": 27.339 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -28546,61 +30671,61 @@ { "name": "time", "unit": "", - "value": 1929.536 + "value": 1918.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.96121589155444 + "value": 25.042953273788836 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098852.0 + "value": 5268.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839816.0 + "value": 1839432.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.3646714392350034 + "value": 3.2372357191619425 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2131155.0 + "value": 36020.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102992.0 + "value": 2100259.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.388302530979452 + "value": 21.790411282645778 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2542666957186231 + "value": 0.2550459422051532 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28630,13 +30755,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.14930458832873 + "value": 97.06383546105208 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91003935722753 + "value": 99.84282590146381 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28648,7 +30773,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4848615424.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28672,7 +30797,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -28690,13 +30815,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 193331200.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.155501564302995 + "value": 44.1497515658731 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28708,13 +30833,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.86784567762422 + "value": 87.19272598708497 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.664383574876297 + "value": 11.367411053980314 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28726,7 +30851,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.5647053677021 + "value": 62.458096866619314 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28745,21 +30870,38 @@ "time" ], "times": { - "compilation": 25970.017, - "data": 60772.455, - "framework": 201157.421, - "kernel_overhead": 34227.313, - "profiling_overhead": 50280.887, - "profiling_runs": 55876.766, + "compilation": 30195.478, + "data": 59900.917, + "framework": 204887.81999999998, + "kernel_overhead": 36456.096, + "profiling_overhead": 49656.147, + "profiling_runs": 58874.66, "runtimes": [ - 1929.536 + 1918.336 ], - "search_algorithm": 37.294, - "validation": 21.727 + "search_algorithm": 39.279, + "validation": 30.274 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -28776,61 +30918,61 @@ { "name": "time", "unit": "", - "value": 1850.304 + "value": 1792.16 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.70493259579457 + "value": 27.42160030298496 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098060.0 + "value": 2076.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1864644.0 + "value": 1874632.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.621156553024347 + "value": 3.3991143332631943 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2117161.0 + "value": 32389.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100312.0 + "value": 2099677.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.047590497715385 + "value": 11.450524359286334 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1327849614115318 + "value": 0.1340715561105501 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28860,13 +31002,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.92352878534302 + "value": 98.22226826586738 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91938234407385 + "value": 99.89474515439014 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28878,7 +31020,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28902,7 +31044,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -28920,13 +31062,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 195854336.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.58570409871653 + "value": 37.78117175859359 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28938,13 +31080,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.72100413724048 + "value": 91.6226195515549 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.445266651351803 + "value": 6.151420990399805 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28956,7 +31098,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.19269265360039 + "value": 66.49300398055207 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28975,21 +31117,38 @@ "time" ], "times": { - "compilation": 27272.074, - "data": 59560.217, - "framework": 203118.783, - "kernel_overhead": 36352.0, - "profiling_overhead": 49592.773, - "profiling_runs": 57613.793, + "compilation": 109746.204, + "data": 59160.057, + "framework": 207316.725, + "kernel_overhead": 38741.543, + "profiling_overhead": 49074.702, + "profiling_runs": 60340.423, "runtimes": [ - 1850.304 + 1792.16 ], - "search_algorithm": 35.902, - "validation": 20.3 + "search_algorithm": 39.925, + "validation": 29.315 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -29006,61 +31165,61 @@ { "name": "time", "unit": "", - "value": 1860.256 + "value": 1823.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.38355624954477 + "value": 26.992800324077482 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097352.0 + "value": 3888.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1865988.0 + "value": 1872496.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.579030095773924 + "value": 3.387804347016584 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2115651.0 + "value": 35765.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2103570.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.0492683880869 + "value": 11.44987121602905 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.132781589521406 + "value": 0.13408258671148598 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29090,13 +31249,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.47793041624257 + "value": 98.15423117489877 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.85297434866729 + "value": 99.90367860726951 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29108,7 +31267,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29132,7 +31291,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -29150,13 +31309,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 195854336.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.610881970169146 + "value": 37.78100318369529 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29168,13 +31327,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.77903357867241 + "value": 91.62196408022353 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.449389348484782 + "value": 6.151376982925164 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29186,7 +31345,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.23509485267606 + "value": 66.49258562883733 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29205,21 +31364,38 @@ "time" ], "times": { - "compilation": 27141.27, - "data": 59344.323, - "framework": 203791.869, - "kernel_overhead": 36962.238, - "profiling_overhead": 49314.885, - "profiling_runs": 58170.423, + "compilation": 30993.851, + "data": 59401.159, + "framework": 206681.46600000001, + "kernel_overhead": 38018.554, + "profiling_overhead": 49288.255, + "profiling_runs": 59973.498, "runtimes": [ - 1860.256 + 1823.776 ], - "search_algorithm": 34.278, - "validation": 18.574 + "search_algorithm": 37.513, + "validation": 26.084 }, - "timestamp": "2026-01-27 09:26:16 UTC" + "timestamp": "2026-03-02 14:28:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -29236,61 +31412,61 @@ { "name": "time", "unit": "", - "value": 1862.848 + "value": 1844.992 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.84642876568279 + "value": 26.483211527854383 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103644.0 + "value": 6104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839096.0 + "value": 1837636.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.628668806407304 + "value": 3.365114432125456 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2135125.0 + "value": 36371.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100518.0 + "value": 2099035.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.066148826588194 + "value": 11.449687274077688 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13298145770100944 + "value": 0.1340634086756307 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29320,13 +31496,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.14210093587774 + "value": 98.19684405698497 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90399244414682 + "value": 99.88597402772389 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29338,7 +31514,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29362,7 +31538,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -29380,13 +31556,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 195854336.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.647643915688064 + "value": 37.78116105674005 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29398,13 +31574,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.86924980298042 + "value": 91.62509672995915 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.455798753092604 + "value": 6.151587304867863 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29416,7 +31592,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.30085837249112 + "value": 66.49480446631371 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29435,21 +31611,38 @@ "time" ], "times": { - "compilation": 27034.197, - "data": 59473.151, - "framework": 204077.20299999998, - "kernel_overhead": 36884.17, - "profiling_overhead": 49599.195, - "profiling_runs": 58120.687, + "compilation": 30148.364, + "data": 60092.571, + "framework": 207440.613, + "kernel_overhead": 37797.212, + "profiling_overhead": 49782.627, + "profiling_runs": 59768.203, "runtimes": [ - 1862.848 + 1844.992 ], - "search_algorithm": 46.506, - "validation": 19.081 + "search_algorithm": 44.891, + "validation": 28.315 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -29466,61 +31659,61 @@ { "name": "time", "unit": "", - "value": 1914.08 + "value": 1828.512 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.176507480386796 + "value": 26.622200491936677 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101348.0 + "value": 2972.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839816.0 + "value": 1839608.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.643576139243586 + "value": 3.3868634097717 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2134610.0 + "value": 33687.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103604.0 + "value": 2101367.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.066370006225496 + "value": 11.450273033319613 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1329963831867375 + "value": 0.13408713699737784 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29550,13 +31743,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.0469428685529 + "value": 98.0929713527553 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92014284502079 + "value": 99.90058893639264 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29568,7 +31761,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29592,7 +31785,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -29610,13 +31803,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 195854336.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.645817787760194 + "value": 37.782603567084564 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29628,13 +31821,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.86475960070864 + "value": 91.6279071339196 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.455479747023 + "value": 6.1517759916572 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29646,7 +31839,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.29764311935253 + "value": 66.49689866677261 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29665,21 +31858,38 @@ "time" ], "times": { - "compilation": 27235.084, - "data": 60066.886, - "framework": 204979.579, - "kernel_overhead": 36818.946, - "profiling_overhead": 49976.633, - "profiling_runs": 58117.114, + "compilation": 30704.32, + "data": 60330.934, + "framework": 209121.563, + "kernel_overhead": 38366.922, + "profiling_overhead": 49953.125, + "profiling_runs": 60470.582, "runtimes": [ - 1914.08 + 1828.512 ], - "search_algorithm": 44.085, - "validation": 19.047 + "search_algorithm": 42.955, + "validation": 29.95 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 40 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -29696,61 +31906,61 @@ { "name": "time", "unit": "", - "value": 1808.512 + "value": 1805.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.98366998221827 + "value": 27.0876660957615 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098652.0 + "value": 5048.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839184.0 + "value": 1837156.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.779175735349016 + "value": 3.4424506763410343 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2127989.0 + "value": 33513.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099302.0 + "value": 2100061.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.399056169437102 + "value": 11.651104976097383 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13666947131225715 + "value": 0.136445220042346 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29780,13 +31990,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.96302412145143 + "value": 98.01774670470641 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92767865434847 + "value": 99.89643597656713 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29798,7 +32008,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4580179968.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29822,7 +32032,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -29840,13 +32050,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 195854336.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.68290873312754 + "value": 38.44908709782345 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29858,13 +32068,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.36721684345278 + "value": 93.24316992726122 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.633266626329287 + "value": 6.260222590331257 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29876,7 +32086,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 68.12351763904022 + "value": 67.66914636075914 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29895,21 +32105,38 @@ "time" ], "times": { - "compilation": 27091.516, - "data": 60290.713, - "framework": 204693.569, - "kernel_overhead": 36979.389, - "profiling_overhead": 49614.866, - "profiling_runs": 57808.601, + "compilation": 31641.172, + "data": 60361.83, + "framework": 207764.54700000002, + "kernel_overhead": 37955.322, + "profiling_overhead": 50198.386, + "profiling_runs": 59249.009, "runtimes": [ - 1808.512 + 1805.824 ], - "search_algorithm": 33.352, - "validation": 19.011 + "search_algorithm": 43.879, + "validation": 28.031 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -29926,61 +32153,61 @@ { "name": "time", "unit": "", - "value": 1800.0 + "value": 1731.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.76478833820349 + "value": 28.368289565419474 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102340.0 + "value": 5280.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1861848.0 + "value": 1869696.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.867946913635508 + "value": 3.5243203511586136 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2122094.0 + "value": 33399.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100198.0 + "value": 2101524.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.642277354809982 + "value": 5.930921872357288 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06917968477303793 + "value": 0.06942355623522745 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30010,13 +32237,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.36604452961046 + "value": 81.71821866563016 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92184948970974 + "value": 99.87840107911596 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30028,7 +32255,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30052,7 +32279,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -30070,13 +32297,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 167002112.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.67390624401305 + "value": 37.640052855807525 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30088,13 +32315,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.52720310170822 + "value": 94.90169488337604 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.727085766827607 + "value": 3.371141749397269 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30106,7 +32333,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.80963495285534 + "value": 58.67191342912649 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30125,21 +32352,38 @@ "time" ], "times": { - "compilation": 27329.878, - "data": 59851.29, - "framework": 180147.81600000002, - "kernel_overhead": 24813.556, - "profiling_overhead": 49774.189, - "profiling_runs": 45708.781, + "compilation": 117306.395, + "data": 60327.551, + "framework": 183504.32499999998, + "kernel_overhead": 26065.145, + "profiling_overhead": 49733.496, + "profiling_runs": 47378.133, "runtimes": [ - 1800.0 + 1731.008 ], - "search_algorithm": 34.712, - "validation": 19.836 + "search_algorithm": 38.55, + "validation": 30.29 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -30156,61 +32400,61 @@ { "name": "time", "unit": "", - "value": 1779.936 + "value": 1764.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 59.10739805640244 + "value": 27.937080938697317 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099992.0 + "value": 252.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870512.0 + "value": 1866392.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.904508279555638 + "value": 3.4911439174028835 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2121344.0 + "value": 28142.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103212.0 + "value": 2097279.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.641422855124741 + "value": 5.930298228550079 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06914398086116218 + "value": 0.06944458572981581 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30240,13 +32484,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.11007669172562 + "value": 81.66753485015992 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9204331464135 + "value": 99.92222346270778 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30258,7 +32502,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30282,7 +32526,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -30300,13 +32544,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 167002112.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.65641334068437 + "value": 37.636609774164526 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30318,13 +32562,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.47975643858112 + "value": 94.88880894631812 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.725215006062219 + "value": 3.3706840092014856 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30336,7 +32580,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.78011624451872 + "value": 58.663946842392235 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30355,21 +32599,38 @@ "time" ], "times": { - "compilation": 27524.638, - "data": 60782.663, - "framework": 180862.272, - "kernel_overhead": 24454.314, - "profiling_overhead": 50236.251, - "profiling_runs": 45389.044, + "compilation": 30536.323, + "data": 59981.885, + "framework": 184721.823, + "kernel_overhead": 26475.882, + "profiling_overhead": 49960.297, + "profiling_runs": 48303.759, "runtimes": [ - 1779.936 + 1764.256 ], - "search_algorithm": 34.956, - "validation": 20.868 + "search_algorithm": 40.293, + "validation": 28.624 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -30386,61 +32647,61 @@ { "name": "time", "unit": "", - "value": 1743.296 + "value": 1765.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.8692638623327 + "value": 27.48230092734519 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102072.0 + "value": 196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838872.0 + "value": 1835780.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.934920440151891 + "value": 3.4968266214622408 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2131860.0 + "value": 30051.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099878.0 + "value": 2099162.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.657152844642519 + "value": 5.930723117251725 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06930497063506431 + "value": 0.06944810235094939 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30470,13 +32731,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.53269125028082 + "value": 81.70632592116904 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91977588685602 + "value": 99.91519269361653 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30488,7 +32749,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30512,7 +32773,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -30530,13 +32791,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 167002112.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.74150834839213 + "value": 37.63552430459736 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30548,13 +32809,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.70035909697086 + "value": 94.90029145743134 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.7339130845119124 + "value": 3.3710918962539695 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30566,7 +32827,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.91736310451067 + "value": 58.67104577670327 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30585,21 +32846,38 @@ "time" ], "times": { - "compilation": 28183.933, - "data": 59473.247, - "framework": 178807.112, - "kernel_overhead": 24499.877, - "profiling_overhead": 49653.564, - "profiling_runs": 45180.424, + "compilation": 30140.925, + "data": 59674.57, + "framework": 181421.825, + "kernel_overhead": 25526.685, + "profiling_overhead": 49335.834, + "profiling_runs": 46884.736, "runtimes": [ - 1743.296 + 1765.28 ], - "search_algorithm": 32.887, - "validation": 18.844 + "search_algorithm": 43.845, + "validation": 31.409 }, - "timestamp": "2026-01-27 09:26:17 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -30616,61 +32894,61 @@ { "name": "time", "unit": "", - "value": 1870.208 + "value": 1755.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.42777145026575 + "value": 27.64104941244462 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099992.0 + "value": 268.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839816.0 + "value": 1836336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.920566362835337 + "value": 3.492123491174811 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2130165.0 + "value": 28241.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103068.0 + "value": 2098524.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.657453500671517 + "value": 5.9296535068530885 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06930733165306673 + "value": 0.06943361427728693 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30700,13 +32978,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.47596059718435 + "value": 81.66473019100874 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9251701633742 + "value": 99.90422647744198 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30718,7 +32996,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30742,7 +33020,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -30760,13 +33038,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 167002112.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.74225098919493 + "value": 37.636692731024105 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30778,13 +33056,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.6984728628508 + "value": 94.89090839214354 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.733838712732033 + "value": 3.37075858668381 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30796,7 +33074,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.916189593220835 + "value": 58.665244801336705 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30815,21 +33093,38 @@ "time" ], "times": { - "compilation": 27145.469, - "data": 59696.556, - "framework": 179297.17700000003, - "kernel_overhead": 24445.83, - "profiling_overhead": 49816.497, - "profiling_runs": 45338.294, + "compilation": 34962.122, + "data": 60928.002, + "framework": 185682.45299999998, + "kernel_overhead": 26226.474, + "profiling_overhead": 50696.109, + "profiling_runs": 47831.868, "runtimes": [ - 1870.208 + 1755.008 ], - "search_algorithm": 33.692, - "validation": 21.384 + "search_algorithm": 46.789, + "validation": 35.451 }, - "timestamp": "2026-01-27 09:26:18 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", @@ -30846,61 +33141,61 @@ { "name": "time", "unit": "", - "value": 1747.04 + "value": 1757.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 58.885401139013105 + "value": 27.54344197214357 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103840.0 + "value": 4104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840144.0 + "value": 1838636.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.951710303821272 + "value": 3.5002593192059854 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2133210.0 + "value": 32296.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103694.0 + "value": 2099549.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.681718518939764 + "value": 5.960023644544943 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06950473127665394 + "value": 0.06975992435408203 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30930,13 +33225,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.0640922590476 + "value": 81.22938882777015 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.82602853483709 + "value": 99.82424407730409 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30948,7 +33243,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4445962240.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30972,7 +33267,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -30990,13 +33285,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 167002112.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.89024463778831 + "value": 37.839212284150626 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31008,13 +33303,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.06250807450934 + "value": 95.41324447964932 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.748192151863589 + "value": 3.3893132499484806 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31026,7 +33321,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.142672311476964 + "value": 58.98817325635226 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31045,21 +33340,38 @@ "time" ], "times": { - "compilation": 27185.912, - "data": 59721.789, - "framework": 179462.00199999998, - "kernel_overhead": 24547.074, - "profiling_overhead": 49692.159, - "profiling_runs": 45500.98, + "compilation": 32434.692, + "data": 60505.393, + "framework": 182748.648, + "kernel_overhead": 25833.474, + "profiling_overhead": 49619.448, + "profiling_runs": 46790.333, "runtimes": [ - 1747.04 + 1757.824 ], - "search_algorithm": 32.269, - "validation": 18.74 + "search_algorithm": 43.547, + "validation": 27.45 }, - "timestamp": "2026-01-27 09:26:18 UTC" + "timestamp": "2026-03-02 14:28:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -31076,61 +33388,61 @@ { "name": "time", "unit": "", - "value": 6438.304 + "value": 6180.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.10962911747709 + "value": 7.399205593754115 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2114244.0 + "value": 52.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1882140.0 + "value": 1870524.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9604890635516716 + "value": 0.9981783604196638 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2208767.0 + "value": 104573.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2145634.0 + "value": 2099048.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.32567915597991 + "value": 26.285176122356464 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30603939341911324 + "value": 0.3079667216493864 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31160,13 +33472,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.15438374295712 + "value": 98.2203318165304 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97009067254479 + "value": 99.97547301904629 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31178,7 +33490,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31202,7 +33514,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -31220,13 +33532,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 817102848.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.54169364265381 + "value": 43.02314933298777 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31238,13 +33550,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.123174837668223 + "value": 26.286274143533596 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.418740199817858 + "value": 13.399838967699743 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31256,7 +33568,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.51785941361392 + "value": 79.91163980436497 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31275,21 +33587,38 @@ "time" ], "times": { - "compilation": 27366.146, - "data": 60794.494, - "framework": 2163052.159, - "kernel_overhead": 1002881.631, - "profiling_overhead": 50809.513, - "profiling_runs": 1048566.521, + "compilation": 77085.897, + "data": 61252.1, + "framework": 2174059.477, + "kernel_overhead": 1008046.814, + "profiling_overhead": 50967.095, + "profiling_runs": 1053793.468, "runtimes": [ - 6438.304 + 6180.544 ], - "search_algorithm": 28.334, - "validation": 19.28 + "search_algorithm": 31.118, + "validation": 31.169 }, - "timestamp": "2026-01-27 09:26:19 UTC" + "timestamp": "2026-03-02 14:28:33 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -31306,61 +33635,61 @@ { "name": "time", "unit": "", - "value": 6435.168 + "value": 6324.0 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.00103273306395 + "value": 7.405938952399276 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104008.0 + "value": 432.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869860.0 + "value": 1871960.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9117618329383548 + "value": 0.9970017452403507 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2197068.0 + "value": 105103.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100589.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.326201356829586 + "value": 26.284200157662436 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3060282084265532 + "value": 0.30794646222443206 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31390,13 +33719,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.88367669500019 + "value": 97.57323694036057 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96459338553325 + "value": 99.96670650359597 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31408,7 +33737,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31432,7 +33761,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -31450,13 +33779,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 817102848.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.54267972387353 + "value": 43.02408799604398 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31468,13 +33797,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.123656621450447 + "value": 26.286849921251466 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.418987678596617 + "value": 13.400132479387954 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31486,7 +33815,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.51934297412623 + "value": 79.91340782526372 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31505,21 +33834,38 @@ "time" ], "times": { - "compilation": 17092.087, - "data": 64028.585, - "framework": 2167931.465, - "kernel_overhead": 1002409.582, - "profiling_overhead": 53669.152, - "profiling_runs": 1047824.146, + "compilation": 22365.997, + "data": 62681.924, + "framework": 2168850.27, + "kernel_overhead": 1004207.878, + "profiling_overhead": 52704.575, + "profiling_runs": 1049255.893, "runtimes": [ - 6435.168 + 6324.0 ], - "search_algorithm": 32.984, - "validation": 18.751 + "search_algorithm": 32.056, + "validation": 21.436 }, - "timestamp": "2026-01-27 09:26:20 UTC" + "timestamp": "2026-03-02 14:28:34 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -31536,61 +33882,61 @@ { "name": "time", "unit": "", - "value": 6591.84 + "value": 6399.52 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.165827656832514 + "value": 7.255536521616275 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099880.0 + "value": 856.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839716.0 + "value": 1839696.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9050248681758468 + "value": 1.001737794154545 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2205696.0 + "value": 108846.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101222.0 + "value": 2103458.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.325839011182644 + "value": 26.28472107494712 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30607954798965453 + "value": 0.3079475523065852 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31620,13 +33966,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.3033417062571 + "value": 97.92650124592967 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97954995822947 + "value": 99.96685694654576 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31638,7 +33984,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31662,7 +34008,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -31680,13 +34026,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 817102848.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.54330941912097 + "value": 43.024290022067085 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31698,13 +34044,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.124130490714727 + "value": 26.286903412611444 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.419231091910103 + "value": 13.400159747444507 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31716,7 +34062,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.52076799252309 + "value": 79.91355320677756 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31735,21 +34081,38 @@ "time" ], "times": { - "compilation": 18608.595, - "data": 63942.851, - "framework": 2175718.323, - "kernel_overhead": 1005740.773, - "profiling_overhead": 53722.275, - "profiling_runs": 1052312.424, + "compilation": 20685.208, + "data": 64288.48, + "framework": 2177427.641, + "kernel_overhead": 1006927.229, + "profiling_overhead": 54241.152, + "profiling_runs": 1051970.78, "runtimes": [ - 6591.84 + 6399.52 ], - "search_algorithm": 29.999, - "validation": 17.152 + "search_algorithm": 35.638, + "validation": 24.978 }, - "timestamp": "2026-01-27 09:26:21 UTC" + "timestamp": "2026-03-02 14:28:35 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -31766,61 +34129,61 @@ { "name": "time", "unit": "", - "value": 6358.976 + "value": 6322.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.010354550168522 + "value": 7.589711935766606 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2108112.0 + "value": 19076.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1844012.0 + "value": 1843024.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9333949107315092 + "value": 1.0068492232907522 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2217382.0 + "value": 128363.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102508.0 + "value": 2107323.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.32570415884508 + "value": 26.284597329485138 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30606952265340703 + "value": 0.3079411469786517 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31850,13 +34213,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.80189336814355 + "value": 97.62706740611283 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97609425365243 + "value": 99.96556732339205 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31868,7 +34231,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31892,7 +34255,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -31910,13 +34273,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 817102848.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.54357690408299 + "value": 43.02357381700273 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31928,13 +34291,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.12417777943274 + "value": 26.286695755118554 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.419255382794551 + "value": 13.400053890792856 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31946,7 +34309,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.52092935772245 + "value": 79.91293915210245 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31965,21 +34328,38 @@ "time" ], "times": { - "compilation": 21608.529, - "data": 63647.289, - "framework": 2171150.613, - "kernel_overhead": 1004154.78, - "profiling_overhead": 53396.012, - "profiling_runs": 1049952.532, + "compilation": 22656.135, + "data": 61528.757, + "framework": 2170466.767, + "kernel_overhead": 1006208.213, + "profiling_overhead": 51750.338, + "profiling_runs": 1050979.459, "runtimes": [ - 6358.976 + 6322.656 ], - "search_algorithm": 34.695, - "validation": 16.475 + "search_algorithm": 26.114, + "validation": 23.448 }, - "timestamp": "2026-01-27 09:26:22 UTC" + "timestamp": "2026-03-02 14:28:36 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 128 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 22 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -31996,61 +34376,61 @@ { "name": "time", "unit": "", - "value": 6411.84 + "value": 6448.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.573995843824894 + "value": 7.275828897703929 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102140.0 + "value": 1136.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840508.0 + "value": 1839516.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9422751297632612 + "value": 0.9976251007100965 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210961.0 + "value": 106946.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108643.0 + "value": 2103557.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.341394927900307 + "value": 26.22199167630891 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67633152.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3061730382223576 + "value": 0.3071864741955581 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32080,13 +34460,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.37146713690687 + "value": 97.48318972789014 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95552641882381 + "value": 99.9578961898209 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32098,7 +34478,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6996099072.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32122,7 +34502,7 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2181038080.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -32140,13 +34520,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 817102848.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.56514127052398 + "value": 42.9214955795746 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32158,13 +34538,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.138390606672452 + "value": 26.224287222133785 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.426556112411825 + "value": 13.368240165970544 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32176,7 +34556,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.56421677143032 + "value": 79.72322916599319 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32195,21 +34575,38 @@ "time" ], "times": { - "compilation": 17769.848, - "data": 61664.789, - "framework": 2159080.7430000002, - "kernel_overhead": 1000396.238, - "profiling_overhead": 51374.141, - "profiling_runs": 1045645.575, + "compilation": 17226.48, + "data": 66995.925, + "framework": 2177434.74, + "kernel_overhead": 1004065.802, + "profiling_overhead": 57075.527, + "profiling_runs": 1049297.486, "runtimes": [ - 6411.84 + 6448.864 ], - "search_algorithm": 26.339, - "validation": 17.117 + "search_algorithm": 25.067, + "validation": 19.946 }, - "timestamp": "2026-01-27 09:26:23 UTC" + "timestamp": "2026-03-02 14:28:37 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 25 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -32226,61 +34623,61 @@ { "name": "time", "unit": "", - "value": 6629.952 + "value": 6190.688 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.704606155029035 + "value": 7.9429364577250015 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103268.0 + "value": 6836.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870716.0 + "value": 1873360.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8782740757950338 + "value": 1.0263728912242345 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2201237.0 + "value": 110238.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100744.0 + "value": 2103946.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.863979162711637 + "value": 13.383709423330373 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14840688595631907 + "value": 0.15680329031695978 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32310,13 +34707,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.75747187104278 + "value": 98.86851235990034 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97878579317475 + "value": 99.97023869434464 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32328,7 +34725,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32340,7 +34737,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10322182144.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -32352,13 +34749,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1929379840.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -32370,13 +34767,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 838467584.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.392614032630156 + "value": 36.23178214299484 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32388,13 +34785,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.333482833989564 + "value": 26.769061703035646 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.605507731128138 + "value": 6.875257058494507 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32406,7 +34803,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.13042865728826 + "value": 83.49684240887731 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32425,21 +34822,38 @@ "time" ], "times": { - "compilation": 16306.776, - "data": 62940.6, - "framework": 2755674.253, - "kernel_overhead": 1297324.361, - "profiling_overhead": 52676.519, - "profiling_runs": 1342732.773, + "compilation": 58363.516, + "data": 63899.044, + "framework": 2756193.406, + "kernel_overhead": 1297583.963, + "profiling_overhead": 54000.23, + "profiling_runs": 1340710.169, "runtimes": [ - 6629.952 + 6190.688 ], - "search_algorithm": 24.797, - "validation": 14.054 + "search_algorithm": 25.989, + "validation": 20.236 }, - "timestamp": "2026-01-27 09:26:25 UTC" + "timestamp": "2026-03-02 14:28:39 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 25 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -32456,61 +34870,61 @@ { "name": "time", "unit": "", - "value": 6635.744 + "value": 6239.808 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.829065667358652 + "value": 7.8408930905497165 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110136.0 + "value": 12228.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871920.0 + "value": 1876672.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.878923365940108 + "value": 1.01942718507223 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2206183.0 + "value": 117160.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102254.0 + "value": 2105444.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.864139505659509 + "value": 13.383295824504549 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14840805548490232 + "value": 0.15681113930796367 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32540,13 +34954,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.63391254019487 + "value": 98.52955473846026 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97880374221275 + "value": 99.97337788587753 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32558,7 +34972,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32570,7 +34984,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10322182144.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -32582,13 +34996,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1929379840.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -32600,13 +35014,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 838467584.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.39299106549274 + "value": 36.23333460408794 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32618,13 +35032,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.33367792777056 + "value": 26.76956106498927 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.605558600307362 + "value": 6.875385312590017 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32636,7 +35050,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.13105682337185 + "value": 83.49841834530764 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32655,21 +35069,38 @@ "time" ], "times": { - "compilation": 16318.298, - "data": 60609.95, - "framework": 2755585.902, - "kernel_overhead": 1299558.953, - "profiling_overhead": 50340.208, - "profiling_runs": 1345076.791, + "compilation": 17146.328, + "data": 62149.082, + "framework": 2753345.402, + "kernel_overhead": 1297373.906, + "profiling_overhead": 52322.688, + "profiling_runs": 1341499.726, "runtimes": [ - 6635.744 + 6239.808 ], - "search_algorithm": 31.231, - "validation": 19.457 + "search_algorithm": 26.309, + "validation": 15.361 }, - "timestamp": "2026-01-27 09:26:26 UTC" + "timestamp": "2026-03-02 14:28:40 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 25 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -32686,61 +35117,61 @@ { "name": "time", "unit": "", - "value": 6582.976 + "value": 6240.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.541393426134533 + "value": 7.671476368331014 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110080.0 + "value": 9264.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1845600.0 + "value": 1838980.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8776953761408255 + "value": 1.0154630356349206 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2222424.0 + "value": 111506.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103404.0 + "value": 2102818.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.864180830307536 + "value": 13.383781650940765 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14841752889766469 + "value": 0.1567976379293333 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32770,13 +35201,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.83671576102546 + "value": 98.77267913476544 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97996246729286 + "value": 99.96659393179232 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32788,7 +35219,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32800,7 +35231,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10322182144.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -32812,13 +35243,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1929379840.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -32830,13 +35261,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 838467584.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.394779357296553 + "value": 36.23236943660888 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32848,13 +35279,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.335001441068215 + "value": 26.769072701291375 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.605903696059778 + "value": 6.875259883241827 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32866,7 +35297,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.13517351983182 + "value": 83.49687711302771 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32885,21 +35316,38 @@ "time" ], "times": { - "compilation": 19062.045, - "data": 62065.318, - "framework": 2757402.683, - "kernel_overhead": 1298737.031, - "profiling_overhead": 51914.423, - "profiling_runs": 1344685.911, + "compilation": 17254.464, + "data": 66885.389, + "framework": 2762605.827, + "kernel_overhead": 1297943.976, + "profiling_overhead": 56353.573, + "profiling_runs": 1341422.889, "runtimes": [ - 6582.976 + 6240.8 ], - "search_algorithm": 24.095, - "validation": 16.462 + "search_algorithm": 23.8, + "validation": 19.089 }, - "timestamp": "2026-01-27 09:26:28 UTC" + "timestamp": "2026-03-02 14:28:42 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 25 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -32916,61 +35364,61 @@ { "name": "time", "unit": "", - "value": 6692.096 + "value": 6278.4 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.672574465486889 + "value": 7.813860799303271 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103472.0 + "value": 18184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839176.0 + "value": 1842260.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8805231163092049 + "value": 1.0267835816582325 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210989.0 + "value": 120980.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101075.0 + "value": 2107015.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.864486572967332 + "value": 13.383471766363472 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.148409440123032 + "value": 0.15680471098111826 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33000,13 +35448,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71003872087879 + "value": 98.58929895016895 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97718948762238 + "value": 99.97091384986095 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33018,7 +35466,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33030,7 +35478,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10322182144.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33042,13 +35490,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1929379840.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -33060,13 +35508,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 838467584.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.39399923498373 + "value": 36.23222970282393 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33078,13 +35526,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.334323336624614 + "value": 26.76912344821457 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.605726885623802 + "value": 6.875272916875422 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33096,7 +35544,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.13307279139869 + "value": 83.49705335062431 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33115,21 +35563,38 @@ "time" ], "times": { - "compilation": 16256.327, - "data": 62786.725, - "framework": 2753235.6550000003, - "kernel_overhead": 1296527.019, - "profiling_overhead": 52051.265, - "profiling_runs": 1341870.646, + "compilation": 15888.808, + "data": 61455.533, + "framework": 2751476.562, + "kernel_overhead": 1297411.834, + "profiling_overhead": 51651.744, + "profiling_runs": 1340957.451, "runtimes": [ - 6692.096 + 6278.4 ], - "search_algorithm": 26.758, - "validation": 14.332 + "search_algorithm": 24.981, + "validation": 20.65 }, - "timestamp": "2026-01-27 09:26:29 UTC" + "timestamp": "2026-03-02 14:28:43 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 25 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -33146,61 +35611,61 @@ { "name": "time", "unit": "", - "value": 6637.472 + "value": 6343.968 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.704194748559056 + "value": 7.720477684463746 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2112852.0 + "value": 14444.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842012.0 + "value": 1842384.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8831714117403198 + "value": 1.0233861854047352 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2219177.0 + "value": 117946.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103576.0 + "value": 2109341.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.850940677052956 + "value": 13.377582364623292 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14825043404844845 + "value": 0.15673282467868527 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33230,13 +35695,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.34779429722413 + "value": 98.11190322949062 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97326369611629 + "value": 99.9705158371198 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33248,7 +35713,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33260,7 +35725,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10322182144.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33272,13 +35737,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1929379840.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -33290,13 +35755,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 838467584.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.36244940163969 + "value": 36.216003633880455 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33308,13 +35773,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.3081738812115 + "value": 26.75695781018483 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.598908619417451 + "value": 6.872148343826767 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33326,7 +35791,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.05141359871571 + "value": 83.45913215432238 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33345,21 +35810,38 @@ "time" ], "times": { - "compilation": 17609.468, - "data": 60841.479, - "framework": 2750682.1330000004, - "kernel_overhead": 1296888.073, - "profiling_overhead": 50858.424, - "profiling_runs": 1342094.157, + "compilation": 16524.438, + "data": 60943.771, + "framework": 2741024.51, + "kernel_overhead": 1292675.638, + "profiling_overhead": 51229.376, + "profiling_runs": 1336175.725, "runtimes": [ - 6637.472 + 6343.968 ], - "search_algorithm": 27.862, - "validation": 18.176 + "search_algorithm": 25.351, + "validation": 17.333 }, - "timestamp": "2026-01-27 09:26:31 UTC" + "timestamp": "2026-03-02 14:28:44 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -33376,61 +35858,61 @@ { "name": "time", "unit": "", - "value": 8484.672 + "value": 8130.816 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.69730809524162 + "value": 6.129069302975833 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103780.0 + "value": 23400.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1940816.0 + "value": 1940648.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.202072912304594 + "value": 48.84507229292939 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2944373.0 + "value": 883544.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138460017.0 + "value": 138429299.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.20234285576296 + "value": 5.159954563704993 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.058677990321808524 + "value": 0.05963846220710941 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33460,13 +35942,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.33979928964085 + "value": 93.26924897031162 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.42438211546141 + "value": 99.02114560957686 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33478,7 +35960,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33490,7 +35972,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4112515072.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33502,13 +35984,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2738880512.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -33520,13 +36002,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 421462016.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.813513024892146 + "value": 12.356358162282225 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33538,13 +36020,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.14471058676996 + "value": 20.55782628519487 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.875696366250919 + "value": 13.059439451678967 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33556,7 +36038,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.628890140959598 + "value": 32.19723358932365 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33575,21 +36057,38 @@ "time" ], "times": { - "compilation": 16368.786, - "data": 61692.419, - "framework": 934404.505, - "kernel_overhead": 382191.625, - "profiling_overhead": 51541.792, - "profiling_runs": 438978.669, + "compilation": 58757.048, + "data": 62066.507, + "framework": 923555.274, + "kernel_overhead": 377228.799, + "profiling_overhead": 52303.156, + "profiling_runs": 431956.812, "runtimes": [ - 8484.672 + 8130.816 ], - "search_algorithm": 52.091, - "validation": 16.318 + "search_algorithm": 26.471, + "validation": 16.782 }, - "timestamp": "2026-01-27 09:26:31 UTC" + "timestamp": "2026-03-02 14:28:45 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -33606,61 +36105,61 @@ { "name": "time", "unit": "", - "value": 8325.28 + "value": 8124.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.653682004726242 + "value": 6.2003226108098 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098748.0 + "value": 19644.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1928496.0 + "value": 1932412.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.52245116496773 + "value": 48.81044085703491 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 3069218.0 + "value": 922403.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414916.0 + "value": 138421827.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.228566782588522 + "value": 5.10318603920842 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05921142125095403 + "value": 0.059565077501131575 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33690,13 +36189,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.64108358250778 + "value": 97.83780089917505 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.34417752912108 + "value": 98.7403364441641 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33708,7 +36207,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33720,7 +36219,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4112515072.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33732,13 +36231,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2738880512.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -33750,13 +36249,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 421462016.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.811480932041173 + "value": 12.376813518752316 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33768,13 +36267,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.141509238168688 + "value": 20.59092280409304 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.873650191778715 + "value": 13.080464144592307 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33786,7 +36285,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.62388491366387 + "value": 32.24908867918433 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33805,21 +36304,38 @@ "time" ], "times": { - "compilation": 16235.35, - "data": 63129.813, - "framework": 942191.1910000001, - "kernel_overhead": 385124.364, - "profiling_overhead": 52901.143, - "profiling_runs": 441035.871, + "compilation": 15994.163, + "data": 63642.063, + "framework": 935007.602, + "kernel_overhead": 381444.325, + "profiling_overhead": 53753.685, + "profiling_runs": 436167.529, "runtimes": [ - 8325.28 + 8124.256 ], - "search_algorithm": 25.454, - "validation": 16.271 + "search_algorithm": 28.428, + "validation": 18.225 }, - "timestamp": "2026-01-27 09:26:32 UTC" + "timestamp": "2026-03-02 14:28:46 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -33836,61 +36352,61 @@ { "name": "time", "unit": "", - "value": 8686.112 + "value": 8277.6 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.409733723727276 + "value": 5.947119754208488 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2114208.0 + "value": 12660.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1913932.0 + "value": 1913712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.00090736700588 + "value": 48.734670222301155 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2646364.0 + "value": 673182.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420833.0 + "value": 138424438.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.170868742598362 + "value": 5.177296595378048 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05888695602168006 + "value": 0.05989702566520283 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33920,13 +36436,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.95780347905517 + "value": 93.29228283916612 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.00866937367914 + "value": 98.5858572352346 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33938,7 +36454,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33950,7 +36466,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4112515072.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33962,13 +36478,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2738880512.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -33980,13 +36496,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 421462016.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.905897199945667 + "value": 12.465168870524554 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33998,13 +36514,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.30133433353357 + "value": 20.738118022621308 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.975804024704807 + "value": 13.17397048214371 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34016,7 +36532,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.874804867004364 + "value": 32.479603320892366 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34035,21 +36551,38 @@ "time" ], "times": { - "compilation": 15963.301, - "data": 63691.752, - "framework": 939418.405, - "kernel_overhead": 383513.6, - "profiling_overhead": 53455.57, - "profiling_runs": 438757.483, + "compilation": 17177.367, + "data": 64184.544, + "framework": 928554.563, + "kernel_overhead": 377422.475, + "profiling_overhead": 54465.22, + "profiling_runs": 432482.324, "runtimes": [ - 8686.112 + 8277.6 ], - "search_algorithm": 24.297, - "validation": 18.561 + "search_algorithm": 27.536, + "validation": 18.017 }, - "timestamp": "2026-01-27 09:26:32 UTC" + "timestamp": "2026-03-02 14:28:46 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -34066,61 +36599,61 @@ { "name": "time", "unit": "", - "value": 8315.903 + "value": 8272.416 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.804588962128866 + "value": 6.143757356821042 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113916.0 + "value": 22788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1909208.0 + "value": 1914668.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.30394277550515 + "value": 48.754066440799235 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2788749.0 + "value": 692430.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417400.0 + "value": 138419959.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.237576208131443 + "value": 5.139622148189375 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059412943260542486 + "value": 0.05954356571516568 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34150,13 +36683,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.91559561741819 + "value": 97.76882272745249 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.7187760759627 + "value": 98.28407741308017 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34168,7 +36701,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34180,7 +36713,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4112515072.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -34192,13 +36725,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2738880512.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -34210,13 +36743,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 421462016.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.92524447586708 + "value": 12.43016247404871 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34228,13 +36761,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.336809941205836 + "value": 20.67904008366368 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.998478619647674 + "value": 13.13644099064768 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34246,7 +36779,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.930523499258932 + "value": 32.387096419234176 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34265,21 +36798,38 @@ "time" ], "times": { - "compilation": 15345.311, - "data": 63234.45, - "framework": 939786.992, - "kernel_overhead": 384740.823, - "profiling_overhead": 53002.033, - "profiling_runs": 438809.686, + "compilation": 17654.453, + "data": 64097.268, + "framework": 935152.174, + "kernel_overhead": 380943.277, + "profiling_overhead": 54290.395, + "profiling_runs": 435821.234, "runtimes": [ - 8315.903 + 8272.416 ], - "search_algorithm": 25.84, - "validation": 16.959 + "search_algorithm": 26.555, + "validation": 16.453 }, - "timestamp": "2026-01-27 09:26:33 UTC" + "timestamp": "2026-03-02 14:28:47 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 32, + "registers": 27 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -34296,61 +36846,61 @@ { "name": "time", "unit": "", - "value": 8305.76 + "value": 8217.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.599896681002095 + "value": 5.873440032256297 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2120372.0 + "value": 7872.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1914756.0 + "value": 1912632.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.449618003971274 + "value": 48.81108370121944 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2748911.0 + "value": 621873.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138428102.0 + "value": 138424159.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.305681437919774 + "value": 5.143402181694774 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05915589904618334 + "value": 0.05994123534554356 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34380,13 +36930,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.5804514676885 + "value": 96.4738915739795 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.72456359526332 + "value": 98.6960128883412 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34398,7 +36948,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34410,7 +36960,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4112515072.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -34422,13 +36972,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2738880512.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 427819008.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -34440,13 +36990,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 421462016.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.11718599234574 + "value": 12.460604963076063 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34458,13 +37008,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.662031596672804 + "value": 20.730261604143386 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.206347343771826 + "value": 13.168979661616476 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34476,7 +37026,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.44115181140972 + "value": 32.46730880328442 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34495,21 +37045,38 @@ "time" ], "times": { - "compilation": 16241.515, - "data": 66099.467, - "framework": 940942.9709999999, - "kernel_overhead": 382308.524, - "profiling_overhead": 54672.574, - "profiling_runs": 437862.406, + "compilation": 17742.418, + "data": 64756.71, + "framework": 930839.614, + "kernel_overhead": 378293.681, + "profiling_overhead": 54344.094, + "profiling_runs": 433445.129, "runtimes": [ - 8305.76 + 8217.536 ], - "search_algorithm": 27.04, - "validation": 16.024 + "search_algorithm": 33.693, + "validation": 18.958 }, - "timestamp": "2026-01-27 09:26:33 UTC" + "timestamp": "2026-03-02 14:28:47 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 34 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -34526,61 +37093,61 @@ { "name": "time", "unit": "", - "value": 8382.176 + "value": 8102.208 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.958069590847593 + "value": 6.503675328285718 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098040.0 + "value": 13992.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2020036.0 + "value": 2022264.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.35421988512385 + "value": 54.18675748014839 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 19521636.0 + "value": 17151845.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414788.0 + "value": 138416998.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.6972015760762176 + "value": 2.5394646986922833 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02970899976702806 + "value": 0.02946643169617346 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34610,13 +37177,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.72108916250622 + "value": 88.63168229980182 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.85121851747836 + "value": 98.08406460865255 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34628,7 +37195,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34640,7 +37207,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3700424704.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -34652,13 +37219,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2470445056.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -34670,13 +37237,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 380665856.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.843016300771886 + "value": 10.069531360436164 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34688,13 +37255,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.311563686533972 + "value": 20.508683836511022 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.697992855599034 + "value": 11.731407770738604 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34706,7 +37273,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.80396434923364 + "value": 29.00341064024267 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34725,21 +37292,38 @@ "time" ], "times": { - "compilation": 17332.943, - "data": 62854.273, - "framework": 901366.937, - "kernel_overhead": 365122.954, - "profiling_overhead": 52436.008, - "profiling_runs": 420953.702, + "compilation": 75771.1, + "data": 62824.066, + "framework": 891007.0690000001, + "kernel_overhead": 360350.754, + "profiling_overhead": 53148.894, + "profiling_runs": 414683.355, "runtimes": [ - 8382.176 + 8102.208 ], - "search_algorithm": 27.977, - "validation": 16.262 + "search_algorithm": 30.52, + "validation": 20.753 }, - "timestamp": "2026-01-27 09:26:34 UTC" + "timestamp": "2026-03-02 14:28:48 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 34 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -34756,61 +37340,61 @@ { "name": "time", "unit": "", - "value": 8404.416 + "value": 8379.103 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.598458436143298 + "value": 6.450887062097389 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109732.0 + "value": 14016.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2020724.0 + "value": 2021500.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.47218814664348 + "value": 54.42673126957164 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 19974717.0 + "value": 18125517.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418395.0 + "value": 138419265.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.661265779776335 + "value": 2.5488469278593695 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029150844303390547 + "value": 0.02957978862727899 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34840,13 +37424,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.93756170552965 + "value": 90.02710759722736 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.7135052625838 + "value": 98.10890918956747 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34858,7 +37442,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34870,7 +37454,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3700424704.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -34882,13 +37466,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2470445056.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -34900,13 +37484,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 380665856.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.869885896735733 + "value": 10.10520755587862 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34918,13 +37502,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.365976696503576 + "value": 20.582366952905012 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.72933081715135 + "value": 11.77355609635167 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34936,7 +37520,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.881146921729446 + "value": 29.107634573462356 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34955,21 +37539,38 @@ "time" ], "times": { - "compilation": 16595.887, - "data": 64227.387, - "framework": 904265.9639999999, - "kernel_overhead": 365326.182, - "profiling_overhead": 53941.799, - "profiling_runs": 420770.596, + "compilation": 18614.155, + "data": 64504.736, + "framework": 898370.9469999999, + "kernel_overhead": 362071.492, + "profiling_overhead": 54579.036, + "profiling_runs": 417215.683, "runtimes": [ - 8404.416 + 8379.103 ], - "search_algorithm": 27.969, - "validation": 17.114 + "search_algorithm": 30.304, + "validation": 20.007 }, - "timestamp": "2026-01-27 09:26:34 UTC" + "timestamp": "2026-03-02 14:28:48 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 34 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -34986,61 +37587,61 @@ { "name": "time", "unit": "", - "value": 8325.663 + "value": 8251.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.663521731090055 + "value": 6.051920271911973 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2116572.0 + "value": 3104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2003056.0 + "value": 1999092.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.957994109752505 + "value": 53.63251171735445 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 16618676.0 + "value": 14939468.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138429918.0 + "value": 138423017.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.719859342235066 + "value": 2.5689191858103726 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029423810529050076 + "value": 0.02981739755850645 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35070,13 +37671,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.29215814871625 + "value": 89.22183644857643 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.94727309364454 + "value": 98.80911096449381 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35088,7 +37689,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35100,7 +37701,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3700424704.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -35112,13 +37713,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2470445056.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -35130,13 +37731,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 380665856.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.938282317708483 + "value": 10.114373912907315 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35148,13 +37749,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.50762008993749 + "value": 20.600674574690707 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.810907175820933 + "value": 11.7840284493409 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35166,7 +37767,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.081992788146117 + "value": 29.13350637885929 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35185,21 +37786,38 @@ "time" ], "times": { - "compilation": 18570.168, - "data": 62424.495, - "framework": 898649.299, - "kernel_overhead": 364215.53, - "profiling_overhead": 52063.207, - "profiling_runs": 419946.067, + "compilation": 19131.648, + "data": 64510.979, + "framework": 894893.003, + "kernel_overhead": 360205.968, + "profiling_overhead": 54585.181, + "profiling_runs": 415590.875, "runtimes": [ - 8325.663 + 8251.296 ], - "search_algorithm": 35.47, - "validation": 16.181 + "search_algorithm": 28.181, + "validation": 21.741 }, - "timestamp": "2026-01-27 09:26:35 UTC" + "timestamp": "2026-03-02 14:28:49 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 34 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -35216,61 +37834,61 @@ { "name": "time", "unit": "", - "value": 8582.88 + "value": 8137.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.349013430593837 + "value": 5.990619072385731 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102308.0 + "value": 5624.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1998096.0 + "value": 1999464.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.309248608013974 + "value": 53.92644022258126 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17416864.0 + "value": 16040746.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420535.0 + "value": 138416525.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.745770566741575 + "value": 2.5613219244462293 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.030227547903332853 + "value": 0.03018060913546997 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35300,13 +37918,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.52211884400522 + "value": 90.51499255124493 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.63692655785765 + "value": 101.1835186611363 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35318,7 +37936,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35330,7 +37948,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3700424704.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -35342,13 +37960,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2470445056.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -35360,13 +37978,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 380665856.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.93720146204321 + "value": 9.996726592689113 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35378,13 +37996,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.504739238843573 + "value": 20.362304166829073 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.80924801377734 + "value": 11.647675454804814 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35396,7 +38014,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.077927141616577 + "value": 28.796421233666546 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35415,21 +38033,38 @@ "time" ], "times": { - "compilation": 16932.544, - "data": 63516.244, - "framework": 899435.8759999999, - "kernel_overhead": 363102.23, - "profiling_overhead": 53315.623, - "profiling_runs": 419501.779, + "compilation": 19115.332, + "data": 64693.932, + "framework": 899171.416, + "kernel_overhead": 362456.244, + "profiling_overhead": 54775.383, + "profiling_runs": 417245.857, "runtimes": [ - 8582.88 + 8137.632 ], - "search_algorithm": 25.856, - "validation": 13.608 + "search_algorithm": 34.914, + "validation": 18.656 }, - "timestamp": "2026-01-27 09:26:36 UTC" + "timestamp": "2026-03-02 14:28:49 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 64, + "registers": 34 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -35446,61 +38081,61 @@ { "name": "time", "unit": "", - "value": 8268.384 + "value": 8240.127 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.75159414667144 + "value": 6.311145868323815 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104456.0 + "value": 18120.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1997436.0 + "value": 2001996.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 52.177182552034694 + "value": 51.711631834122606 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 10434381.0 + "value": 8383106.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415821.0 + "value": 138421377.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.722752516003285 + "value": 2.601184825751652 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02988937937773416 + "value": 0.029857958924701375 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35530,13 +38165,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.4565038130171 + "value": 85.39645880845715 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.44669560724635 + "value": 98.04594490015953 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35548,7 +38183,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35560,7 +38195,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3700424704.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -35572,13 +38207,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2470445056.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 213909504.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -35590,13 +38225,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 380665856.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.942640291463281 + "value": 10.20537951789363 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35608,13 +38243,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.51801003938675 + "value": 20.789267025119948 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.816891035867513 + "value": 11.891907382777353 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35626,7 +38261,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.09674901104483 + "value": 29.400224806380564 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35645,21 +38280,38 @@ "time" ], "times": { - "compilation": 15897.435, - "data": 63130.672, - "framework": 897794.649, - "kernel_overhead": 363688.885, - "profiling_overhead": 52831.273, - "profiling_runs": 418143.819, + "compilation": 18600.939, + "data": 64595.093, + "framework": 895366.896, + "kernel_overhead": 360830.178, + "profiling_overhead": 54752.841, + "profiling_runs": 415188.784, "runtimes": [ - 8268.384 + 8240.127 ], - "search_algorithm": 29.739, - "validation": 19.874 + "search_algorithm": 28.138, + "validation": 18.241 }, - "timestamp": "2026-01-27 09:26:36 UTC" + "timestamp": "2026-03-02 14:28:50 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -35676,61 +38328,61 @@ { "name": "time", "unit": "", - "value": 9877.856 + "value": 9253.984 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.543212515321635 + "value": 5.775040268499445 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2122496.0 + "value": 6704.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2241156.0 + "value": 2297088.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 75.23389293331387 + "value": 75.45248369937089 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 120374550.0 + "value": 111326794.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424177.0 + "value": 138415637.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2234255362234265 + "value": 1.116670363339105 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01236474898613687 + "value": 0.012990189087805623 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35760,13 +38412,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.53290476545217 + "value": 91.98073269676199 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 95.79233833241638 + "value": 98.77652951177926 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35778,7 +38430,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35790,7 +38442,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3494379520.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -35802,13 +38454,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -35820,13 +38472,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 360267776.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.64364782154687 + "value": 7.828323321331658 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35838,13 +38490,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.623543012895944 + "value": 17.955619878069218 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.5926975457157 + "value": 9.703311669947807 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35856,7 +38508,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.652874541946392 + "value": 24.028438400047456 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35875,21 +38527,38 @@ "time" ], "times": { - "compilation": 17589.849, - "data": 62729.402, - "framework": 887414.433, - "kernel_overhead": 353829.081, - "profiling_overhead": 52056.329, - "profiling_runs": 418799.621, + "compilation": 73503.121, + "data": 66346.394, + "framework": 887508.971, + "kernel_overhead": 350859.814, + "profiling_overhead": 55838.638, + "profiling_runs": 414464.125, "runtimes": [ - 9877.856 + 9253.984 ], - "search_algorithm": 27.59, - "validation": 17.035 + "search_algorithm": 43.092, + "validation": 36.843 }, - "timestamp": "2026-01-27 09:26:37 UTC" + "timestamp": "2026-03-02 14:28:50 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -35906,61 +38575,61 @@ { "name": "time", "unit": "", - "value": 9989.024 + "value": 9582.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.235121503935137 + "value": 6.200406648411306 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2119092.0 + "value": 26620.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2237072.0 + "value": 2297844.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.86252004810848 + "value": 77.21577443380141 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 125909496.0 + "value": 121642075.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138422342.0 + "value": 138426378.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2278683548857303 + "value": 1.1045934637214303 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012395668528928906 + "value": 0.01263563591130812 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35990,13 +38659,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.65838606697619 + "value": 95.25250065012403 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 96.87663293362884 + "value": 96.8756928108476 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36008,7 +38677,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36020,7 +38689,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3494379520.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -36032,13 +38701,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -36050,13 +38719,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 360267776.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.578614411845605 + "value": 7.764100576795764 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36068,13 +38737,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.469867520160978 + "value": 17.808238988553505 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.509050204150123 + "value": 9.623666260049605 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36086,7 +38755,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.446638668124685 + "value": 23.83122973332753 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36105,21 +38774,38 @@ "time" ], "times": { - "compilation": 17014.591, - "data": 62021.511, - "framework": 888373.475, - "kernel_overhead": 354811.181, - "profiling_overhead": 51763.772, - "profiling_runs": 419777.011, + "compilation": 21280.612, + "data": 64163.99, + "framework": 887605.735, + "kernel_overhead": 352272.434, + "profiling_overhead": 54421.129, + "profiling_runs": 416748.182, "runtimes": [ - 9989.024 + 9582.336 ], - "search_algorithm": 25.109, - "validation": 17.256 + "search_algorithm": 32.109, + "validation": 24.724 }, - "timestamp": "2026-01-27 09:26:37 UTC" + "timestamp": "2026-03-02 14:28:51 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -36136,61 +38822,61 @@ { "name": "time", "unit": "", - "value": 9547.712 + "value": 9353.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.52512556294973 + "value": 5.676980173588034 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105336.0 + "value": 6780.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2178380.0 + "value": 2201796.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 75.15960045961947 + "value": 73.2189469175961 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 112149281.0 + "value": 100375148.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418778.0 + "value": 138419868.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2554828990239648 + "value": 1.1345222952271643 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012985112517775274 + "value": 0.013213699741943585 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36220,13 +38906,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.79451362107109 + "value": 88.51071675154098 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.81126669170274 + "value": 98.50990661598375 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36238,7 +38924,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36250,7 +38936,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3494379520.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -36262,13 +38948,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -36280,13 +38966,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 360267776.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.705538846611963 + "value": 7.985234542270512 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36298,13 +38984,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.76253076956122 + "value": 18.314000422985252 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.668350183285337 + "value": 9.896982406317836 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36316,7 +39002,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.839412513843687 + "value": 24.508026974850655 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36335,21 +39021,38 @@ "time" ], "times": { - "compilation": 15674.127, - "data": 64101.311, - "framework": 890274.67, - "kernel_overhead": 354652.954, - "profiling_overhead": 53846.121, - "profiling_runs": 417674.284, + "compilation": 21022.224, + "data": 63041.019, + "framework": 882059.7279999999, + "kernel_overhead": 352114.126, + "profiling_overhead": 53094.882, + "profiling_runs": 413809.701, "runtimes": [ - 9547.712 + 9353.088 ], - "search_algorithm": 24.356, - "validation": 15.265 + "search_algorithm": 29.869, + "validation": 21.326 }, - "timestamp": "2026-01-27 09:26:38 UTC" + "timestamp": "2026-03-02 14:28:51 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -36366,61 +39069,61 @@ { "name": "time", "unit": "", - "value": 9812.0 + "value": 9566.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.565723711620038 + "value": 5.689490571237353 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110736.0 + "value": 16064.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2174140.0 + "value": 2203744.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.98080504913275 + "value": 76.41344404854513 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 123083830.0 + "value": 118661340.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417246.0 + "value": 138417059.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2281542243552246 + "value": 1.098209084603213 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012695353785663993 + "value": 0.012625767564057892 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36450,13 +39153,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.10119402100007 + "value": 94.46761822117791 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.49302783784859 + "value": 97.72916732682745 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36468,7 +39171,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36480,7 +39183,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3494379520.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -36492,13 +39195,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -36510,13 +39213,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 360267776.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.631432320114276 + "value": 7.686837732407105 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36528,13 +39231,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.598595639239505 + "value": 17.638931943907977 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.579118402754998 + "value": 9.532171840293042 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36546,7 +39249,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.61940710446104 + "value": 23.604660723387145 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36565,21 +39268,38 @@ "time" ], "times": { - "compilation": 15543.508, - "data": 61781.74, - "framework": 887394.0250000001, - "kernel_overhead": 355005.602, - "profiling_overhead": 51532.81, - "profiling_runs": 419073.873, + "compilation": 21113.852, + "data": 63087.309, + "framework": 885094.192, + "kernel_overhead": 353008.009, + "profiling_overhead": 53220.388, + "profiling_runs": 415778.486, "runtimes": [ - 9812.0 + 9566.048 ], - "search_algorithm": 34.638, - "validation": 19.786 + "search_algorithm": 29.353, + "validation": 21.363 }, - "timestamp": "2026-01-27 09:26:38 UTC" + "timestamp": "2026-03-02 14:28:52 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", @@ -36596,61 +39316,61 @@ { "name": "time", "unit": "", - "value": 8795.232 + "value": 8775.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.574023045156856 + "value": 6.682030135315506 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109160.0 + "value": 24748.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2201368.0 + "value": 2205224.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.138371135898886 + "value": 61.384258458295484 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 47461292.0 + "value": 46639972.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417946.0 + "value": 138425882.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.3543240192731203 + "value": 1.2208052445019124 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013939473869714334 + "value": 0.014113423733518345 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36680,13 +39400,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.82328961472544 + "value": 75.99503397902264 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.14499130473135 + "value": 97.61927959264396 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36698,7 +39418,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36710,7 +39430,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3494379520.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -36722,13 +39442,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2336227328.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -36740,13 +39460,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 360267776.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.49846913352596 + "value": 8.604963210496246 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36758,13 +39478,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.591363453571113 + "value": 19.739469448289622 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.663804887631054 + "value": 10.66731338471413 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36776,7 +39496,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.293940486785722 + "value": 26.41562088811541 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36795,21 +39515,38 @@ "time" ], "times": { - "compilation": 14933.783, - "data": 61425.48, - "framework": 879562.916, - "kernel_overhead": 354768.44, - "profiling_overhead": 51278.529, - "profiling_runs": 412090.467, + "compilation": 20048.823, + "data": 63328.07, + "framework": 877791.0549999999, + "kernel_overhead": 351603.916, + "profiling_overhead": 53231.508, + "profiling_runs": 409627.561, "runtimes": [ - 8795.232 + 8775.232 ], - "search_algorithm": 25.736, - "validation": 18.537 + "search_algorithm": 34.271, + "validation": 20.189 }, - "timestamp": "2026-01-27 09:26:39 UTC" + "timestamp": "2026-03-02 14:28:52 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 26 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -36826,61 +39563,61 @@ { "name": "time", "unit": "", - "value": 4009.472 + "value": 3939.648 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.848521277655966 + "value": 12.36165719728759 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109932.0 + "value": 12060.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873900.0 + "value": 1873336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.0616242797158013 + "value": 1.5694895026912192 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2164583.0 + "value": 80616.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106412.0 + "value": 2101164.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.377761939015762 + "value": 20.894371321002005 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24656317955740825 + "value": 0.24478427763015234 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36910,13 +39647,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.31239162124609 + "value": 98.5293467870994 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95478866156505 + "value": 99.96006543118801 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36928,7 +39665,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36940,7 +39677,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5477761024.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -36952,13 +39689,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -36970,13 +39707,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 536215552.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.530676447895175 + "value": 39.61045113899097 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36988,13 +39725,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.09914956678655 + "value": 41.79320665241534 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.977024349933604 + "value": 10.733997411704332 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37006,7 +39743,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.09626922984211 + "value": 83.34220133222632 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37025,21 +39762,38 @@ "time" ], "times": { - "compilation": 14967.04, - "data": 61628.594, - "framework": 1314441.528, - "kernel_overhead": 584784.8, - "profiling_overhead": 51360.264, - "profiling_runs": 616667.87, + "compilation": 67937.636, + "data": 61139.095, + "framework": 1378305.634, + "kernel_overhead": 616833.811, + "profiling_overhead": 51257.155, + "profiling_runs": 649075.573, "runtimes": [ - 4009.472 + 3939.648 ], - "search_algorithm": 38.007, - "validation": 17.312 + "search_algorithm": 26.63, + "validation": 23.172 }, - "timestamp": "2026-01-27 09:26:39 UTC" + "timestamp": "2026-03-02 14:28:53 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 26 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -37056,61 +39810,61 @@ { "name": "time", "unit": "", - "value": 4073.567 + "value": 4009.696 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.24782280821251 + "value": 12.274245526872335 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2113076.0 + "value": 1076.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872412.0 + "value": 1870988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.0944886500878797 + "value": 1.5602848653773602 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2170105.0 + "value": 68623.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101362.0 + "value": 2100075.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.37707977457399 + "value": 20.895342893181475 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2466065757137678 + "value": 0.24478554546135017 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37140,13 +39894,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.00113438167384 + "value": 98.48662080554776 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96856427178862 + "value": 99.96157359475532 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37158,7 +39912,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37170,7 +39924,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5477761024.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -37182,13 +39936,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -37200,13 +39954,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 536215552.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.532685955222945 + "value": 39.60914710688622 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37218,13 +39972,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.10075693467462 + "value": 41.79279255990257 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.977443458552855 + "value": 10.7338910578656 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37236,7 +39990,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.09950830081979 + "value": 83.34140359113592 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37255,21 +40009,38 @@ "time" ], "times": { - "compilation": 14523.727, - "data": 62057.871, - "framework": 1313678.7340000002, - "kernel_overhead": 583735.826, - "profiling_overhead": 51582.514, - "profiling_runs": 616302.523, + "compilation": 17136.205, + "data": 66282.431, + "framework": 1391796.605, + "kernel_overhead": 618658.069, + "profiling_overhead": 56395.443, + "profiling_runs": 650460.662, "runtimes": [ - 4073.567 + 4009.696 ], - "search_algorithm": 29.419, - "validation": 17.523 + "search_algorithm": 26.521, + "validation": 20.832 }, - "timestamp": "2026-01-27 09:26:40 UTC" + "timestamp": "2026-03-02 14:28:54 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 26 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -37286,61 +40057,61 @@ { "name": "time", "unit": "", - "value": 3999.392 + "value": 4080.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.989545462231767 + "value": 12.029993186199562 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097388.0 + "value": 11852.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837188.0 + "value": 1841252.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.0886645812046156 + "value": 1.5653363072438502 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2162935.0 + "value": 78818.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099195.0 + "value": 2104938.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.379763894424592 + "value": 20.894731209843474 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24660886470363963 + "value": 0.24477825561130695 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37370,13 +40141,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.48363668246468 + "value": 98.5853839145707 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95837170917875 + "value": 99.95773420503716 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37388,7 +40159,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37400,7 +40171,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5477761024.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -37412,13 +40183,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -37430,13 +40201,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 536215552.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.53684523707941 + "value": 39.60979916505507 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37448,13 +40219,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.10544068472097 + "value": 41.79315316608874 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.978664709785644 + "value": 10.733983674493496 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37466,7 +40237,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.10883559137594 + "value": 83.34209218104593 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37485,21 +40256,38 @@ "time" ], "times": { - "compilation": 14217.079, - "data": 61176.929, - "framework": 1322227.118, - "kernel_overhead": 589299.283, - "profiling_overhead": 50834.864, - "profiling_runs": 620916.042, + "compilation": 17107.172, + "data": 65551.825, + "framework": 1389663.93, + "kernel_overhead": 618156.522, + "profiling_overhead": 55822.607, + "profiling_runs": 650132.976, "runtimes": [ - 3999.392 + 4080.224 ], - "search_algorithm": 21.816, - "validation": 15.118 + "search_algorithm": 31.347, + "validation": 20.618 }, - "timestamp": "2026-01-27 09:26:41 UTC" + "timestamp": "2026-03-02 14:28:55 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 26 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -37516,61 +40304,61 @@ { "name": "time", "unit": "", - "value": 3994.912 + "value": 4055.552 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.383935796026886 + "value": 12.009903602295859 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098716.0 + "value": 5352.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836452.0 + "value": 1837340.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1018743558087447 + "value": 1.5641518669563421 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2164070.0 + "value": 71455.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100424.0 + "value": 2100689.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.37951951426517 + "value": 20.894594591144457 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2465971851136014 + "value": 0.24476231210152738 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37600,13 +40388,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.25322994951983 + "value": 98.397416805066 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95469666619691 + "value": 99.95374370385154 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37618,7 +40406,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37630,7 +40418,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5477761024.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -37642,13 +40430,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -37660,13 +40448,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 536215552.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.536565981077764 + "value": 39.60823188700584 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37678,13 +40466,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.104994558954125 + "value": 41.79209940926341 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.978548385977296 + "value": 10.733713031871364 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37696,7 +40484,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.10797328207471 + "value": 83.34002133970858 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37715,21 +40503,38 @@ "time" ], "times": { - "compilation": 14548.558, - "data": 61868.308, - "framework": 1312452.302, - "kernel_overhead": 583586.022, - "profiling_overhead": 51562.535, - "profiling_runs": 615435.437, + "compilation": 16204.943, + "data": 64730.565, + "framework": 1386203.366, + "kernel_overhead": 617024.109, + "profiling_overhead": 54823.11, + "profiling_runs": 649625.582, "runtimes": [ - 3994.912 + 4055.552 ], - "search_algorithm": 25.687, - "validation": 15.112 + "search_algorithm": 23.247, + "validation": 18.273 }, - "timestamp": "2026-01-27 09:26:42 UTC" + "timestamp": "2026-03-02 14:28:55 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 64 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 26 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -37746,61 +40551,61 @@ { "name": "time", "unit": "", - "value": 3997.184 + "value": 4199.616 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.730564526897336 + "value": 11.645153801862005 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105748.0 + "value": 484.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840736.0 + "value": 1837520.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.0748507787101924 + "value": 1.5575190926150668 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2173142.0 + "value": 65667.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108563.0 + "value": 2099173.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.271784923590474 + "value": 20.924264771217338 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 34078720.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24536633425332396 + "value": 0.24514210706204476 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37830,13 +40635,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.81580167448942 + "value": 98.09200468178501 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96151455854228 + "value": 99.96158869262082 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37848,7 +40653,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5653921792.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37860,7 +40665,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5477761024.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -37872,13 +40677,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1107296256.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1933574144.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -37890,13 +40695,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 536215552.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.32203061158044 + "value": 39.66725057599654 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37908,13 +40713,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.89197669139901 + "value": 41.85366281099409 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.92300564121439 + "value": 10.749524725870552 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37926,7 +40731,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.68245179344001 + "value": 83.46282150289375 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37945,21 +40750,38 @@ "time" ], "times": { - "compilation": 17351.073, - "data": 61510.676, - "framework": 1320798.738, - "kernel_overhead": 588151.352, - "profiling_overhead": 51213.737, - "profiling_runs": 619922.973, + "compilation": 16186.575, + "data": 65553.243, + "framework": 1389465.913, + "kernel_overhead": 617980.502, + "profiling_overhead": 55676.053, + "profiling_runs": 650256.115, "runtimes": [ - 3997.184 + 4199.616 ], - "search_algorithm": 30.829, - "validation": 20.533 + "search_algorithm": 24.335, + "validation": 19.022 }, - "timestamp": "2026-01-27 09:26:42 UTC" + "timestamp": "2026-03-02 14:28:56 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 30 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -37976,61 +40798,61 @@ { "name": "time", "unit": "", - "value": 4761.248 + "value": 4191.167 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.86783358029918 + "value": 11.622128975265017 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098036.0 + "value": 12044.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868824.0 + "value": 1874036.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.5819755090413894 + "value": 1.480133420005051 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2165986.0 + "value": 86959.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103038.0 + "value": 2102320.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.004766282090744 + "value": 9.818284256403933 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.10230745012293255 + "value": 0.11502842432121015 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38060,13 +40882,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.02932004969921 + "value": 98.9881837599281 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97736110471065 + "value": 99.96947760708863 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38078,7 +40900,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38090,7 +40912,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5460983808.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38102,13 +40924,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1774190592.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -38120,13 +40942,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 622460928.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.9870576086791 + "value": 43.263543915002835 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38138,13 +40960,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.92885048118052 + "value": 39.27502317853031 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.690153262853829 + "value": 5.120327728841597 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38156,7 +40978,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 80.99514343873363 + "value": 90.92948541967701 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38175,21 +40997,38 @@ "time" ], "times": { - "compilation": 18300.148, - "data": 61104.089, - "framework": 1936693.9, - "kernel_overhead": 894830.157, - "profiling_overhead": 50558.534, - "profiling_runs": 930201.12, + "compilation": 59256.458, + "data": 63798.898, + "framework": 1889481.542, + "kernel_overhead": 869355.885, + "profiling_overhead": 54027.056, + "profiling_runs": 902299.703, "runtimes": [ - 4761.248 + 4191.167 ], - "search_algorithm": 26.659, - "validation": 16.685 + "search_algorithm": 30.036, + "validation": 16.935 }, - "timestamp": "2026-01-27 09:26:43 UTC" + "timestamp": "2026-03-02 14:28:57 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 30 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -38206,61 +41045,61 @@ { "name": "time", "unit": "", - "value": 4848.928 + "value": 4325.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.67117196435047 + "value": 11.424446392359016 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107760.0 + "value": 13876.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869916.0 + "value": 1875424.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.5733276875189075 + "value": 1.4852132960123188 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2173412.0 + "value": 86693.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101290.0 + "value": 2106049.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.004817507291069 + "value": 9.81811205748006 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.10230176898890211 + "value": 0.11501900322105627 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38290,13 +41129,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.87764828547452 + "value": 98.80566035001763 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97216982172743 + "value": 99.95910205334381 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38308,7 +41147,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38320,7 +41159,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5460983808.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38332,13 +41171,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1774190592.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -38350,13 +41189,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 622460928.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.98690323976678 + "value": 43.2645440877433 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38368,13 +41207,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.92872454118673 + "value": 39.27588279571508 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.690136351965992 + "value": 5.120439798074183 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38386,7 +41225,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 80.99487586390642 + "value": 90.93150369662779 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38405,21 +41244,38 @@ "time" ], "times": { - "compilation": 18000.121, - "data": 64139.019, - "framework": 1941409.074, - "kernel_overhead": 893566.658, - "profiling_overhead": 54021.405, - "profiling_runs": 929681.992, + "compilation": 20099.09, + "data": 65448.24, + "framework": 1888469.4810000001, + "kernel_overhead": 867420.456, + "profiling_overhead": 54804.516, + "profiling_runs": 900796.269, "runtimes": [ - 4848.928 + 4325.248 ], - "search_algorithm": 28.715, - "validation": 17.248 + "search_algorithm": 34.629, + "validation": 21.03 }, - "timestamp": "2026-01-27 09:26:44 UTC" + "timestamp": "2026-03-02 14:28:58 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 30 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -38436,61 +41292,61 @@ { "name": "time", "unit": "", - "value": 4846.016 + "value": 4395.488 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.471831845445667 + "value": 11.300228871317394 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102104.0 + "value": 13456.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838544.0 + "value": 1841416.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.5777456164700485 + "value": 1.474778771877703 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2180920.0 + "value": 85713.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100904.0 + "value": 2105119.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.005602455223926 + "value": 9.818107460994959 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.10231273576245593 + "value": 0.11502664226906152 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38520,13 +41376,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.03634631257495 + "value": 98.9198127511204 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97531870733587 + "value": 99.96708520190391 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38538,7 +41394,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38550,7 +41406,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5460983808.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38562,13 +41418,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1774190592.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -38580,13 +41436,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 622460928.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.98946285313594 + "value": 43.264464575275596 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38598,13 +41454,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.931368653580606 + "value": 39.275354631518155 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.690491396354817 + "value": 5.12037094073015 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38616,7 +41472,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.00098325060928 + "value": 90.93025455497249 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38635,21 +41491,38 @@ "time" ], "times": { - "compilation": 19116.588, - "data": 61731.469, - "framework": 1935931.361, - "kernel_overhead": 893564.107, - "profiling_overhead": 51534.454, - "profiling_runs": 929101.331, + "compilation": 18875.479, + "data": 65213.848, + "framework": 1892616.432, + "kernel_overhead": 869480.629, + "profiling_overhead": 54984.999, + "profiling_runs": 902936.956, "runtimes": [ - 4846.016 + 4395.488 ], - "search_algorithm": 24.558, - "validation": 17.093 + "search_algorithm": 38.983, + "validation": 22.538 }, - "timestamp": "2026-01-27 09:26:45 UTC" + "timestamp": "2026-03-02 14:28:59 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 30 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -38666,61 +41539,61 @@ { "name": "time", "unit": "", - "value": 4881.568 + "value": 4527.808 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.379863947933256 + "value": 11.268009055982436 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110472.0 + "value": 780.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842032.0 + "value": 1838736.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.5665485530134613 + "value": 1.4714708020314364 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2191152.0 + "value": 71335.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109009.0 + "value": 2099900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.005753582900875 + "value": 9.818250988273213 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.10231155592288615 + "value": 0.11502039206340572 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38750,13 +41623,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.9856794037345 + "value": 98.8290860239727 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97025618104216 + "value": 99.95954486141625 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38768,7 +41641,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38780,7 +41653,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5460983808.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38792,13 +41665,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1774190592.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -38810,13 +41683,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 622460928.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.99043280774745 + "value": 43.265512485384086 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38828,13 +41701,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.932734751059854 + "value": 39.27618305859592 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.690674832295635 + "value": 5.120478943674371 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38846,7 +41719,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.0041749853352 + "value": 90.93219886509544 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38865,21 +41738,38 @@ "time" ], "times": { - "compilation": 16520.803, - "data": 60652.393, - "framework": 1929983.3020000001, - "kernel_overhead": 891601.214, - "profiling_overhead": 50470.356, - "profiling_runs": 927259.339, + "compilation": 16962.238, + "data": 66009.275, + "framework": 1889435.213, + "kernel_overhead": 867033.188, + "profiling_overhead": 55988.89, + "profiling_runs": 900403.86, "runtimes": [ - 4881.568 + 4527.808 ], - "search_algorithm": 31.139, - "validation": 17.277 + "search_algorithm": 25.943, + "validation": 18.136 }, - "timestamp": "2026-01-27 09:26:46 UTC" + "timestamp": "2026-03-02 14:29:0 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 30 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -38896,61 +41786,61 @@ { "name": "time", "unit": "", - "value": 4847.808 + "value": 4415.648 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 20.989200008162488 + "value": 11.294321443696766 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107172.0 + "value": 5780.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842532.0 + "value": 1837540.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.571327401354967 + "value": 1.4724987501401523 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2188377.0 + "value": 74154.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102707.0 + "value": 2100396.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.994728403610889 + "value": 9.820197007110334 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.10218675327703637 + "value": 0.11505090122269672 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38980,13 +41870,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6465654128516 + "value": 98.78063824314737 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95930380638171 + "value": 99.96686818090743 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38998,7 +41888,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39010,7 +41900,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5460983808.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39022,13 +41912,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1774190592.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -39040,13 +41930,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 622460928.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.95483904499311 + "value": 43.27317453608271 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39058,13 +41948,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.89394562623486 + "value": 39.28372302939407 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.685466331647747 + "value": 5.121461937914168 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39076,7 +41966,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 80.9142327953996 + "value": 90.94965071863393 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39095,21 +41985,38 @@ "time" ], "times": { - "compilation": 15578.485, - "data": 63118.272, - "framework": 1931121.685, - "kernel_overhead": 889656.174, - "profiling_overhead": 52829.147, - "profiling_runs": 925518.092, + "compilation": 16717.378, + "data": 63308.564, + "framework": 1883345.6460000002, + "kernel_overhead": 866639.302, + "profiling_overhead": 53577.464, + "profiling_runs": 899820.316, "runtimes": [ - 4847.808 + 4415.648 ], - "search_algorithm": 23.399, - "validation": 13.103 + "search_algorithm": 26.015, + "validation": 18.09 }, - "timestamp": "2026-01-27 09:26:47 UTC" + "timestamp": "2026-03-02 14:29:1 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -39126,61 +42033,61 @@ { "name": "time", "unit": "", - "value": 8062.815 + "value": 6785.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.662295303517746 + "value": 7.228294259313869 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109808.0 + "value": 11472.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869212.0 + "value": 1870984.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6419167084044486 + "value": 0.9370103991671824 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2225073.0 + "value": 122680.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102446.0 + "value": 2101479.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.923171201613733 + "value": 3.041119157464842 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03223627595249105 + "value": 0.03563094655575491 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39210,13 +42117,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.83085856847794 + "value": 98.85671656581624 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98235545612481 + "value": 99.97623552438426 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39228,7 +42135,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39240,7 +42147,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8467251200.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39252,13 +42159,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -39270,13 +42177,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 971702272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.531915936216695 + "value": 36.4768872473672 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39288,13 +42195,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.010514705158197 + "value": 24.329841374616166 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.563735297656503 + "value": 1.6334732368211535 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39306,7 +42213,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.67558084014644 + "value": 87.97622704770431 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39325,21 +42232,38 @@ "time" ], "times": { - "compilation": 15154.891, - "data": 62996.806, - "framework": 3110732.614, - "kernel_overhead": 1472748.273, - "profiling_overhead": 52629.734, - "profiling_runs": 1522357.801, + "compilation": 52167.45, + "data": 62867.205, + "framework": 3118279.801, + "kernel_overhead": 1478474.159, + "profiling_overhead": 53071.528, + "profiling_runs": 1523866.909, "runtimes": [ - 8062.815 + 6785.248 ], - "search_algorithm": 36.095, - "validation": 14.294 + "search_algorithm": 30.774, + "validation": 18.848 }, - "timestamp": "2026-01-27 09:26:49 UTC" + "timestamp": "2026-03-02 14:29:3 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -39356,61 +42280,61 @@ { "name": "time", "unit": "", - "value": 7664.544 + "value": 6931.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.731781644731031 + "value": 7.224742528320665 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107708.0 + "value": 18784.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872004.0 + "value": 1876748.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6437561464188177 + "value": 0.9423400671137603 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2220904.0 + "value": 131203.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108929.0 + "value": 2111310.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.92311048743405 + "value": 3.0410468905774724 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.032235932608117544 + "value": 0.03563072442470502 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39440,13 +42364,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.78274125831581 + "value": 98.81698101403725 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98292147150526 + "value": 99.97762332948358 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39458,7 +42382,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39470,7 +42394,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8467251200.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39482,13 +42406,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -39500,13 +42424,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 971702272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.53152234761756 + "value": 36.476185316791046 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39518,13 +42442,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.010155671182947 + "value": 24.3293519728617 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5637097901157808 + "value": 1.6334403790373457 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39536,7 +42460,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.6742949526775 + "value": 87.97447478231129 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39555,21 +42479,38 @@ "time" ], "times": { - "compilation": 14502.069, - "data": 64505.05, - "framework": 3119183.376, - "kernel_overhead": 1475701.685, - "profiling_overhead": 54208.014, - "profiling_runs": 1524768.627, + "compilation": 15950.954, + "data": 64770.071, + "framework": 3128184.708, + "kernel_overhead": 1481349.587, + "profiling_overhead": 55066.712, + "profiling_runs": 1526998.338, "runtimes": [ - 7664.544 + 6931.264 ], - "search_algorithm": 23.878, - "validation": 13.762 + "search_algorithm": 25.4, + "validation": 16.999 }, - "timestamp": "2026-01-27 09:26:51 UTC" + "timestamp": "2026-03-02 14:29:5 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -39586,61 +42527,61 @@ { "name": "time", "unit": "", - "value": 7634.368 + "value": 6905.696 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.614816096172042 + "value": 7.056790028439909 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102600.0 + "value": 10000.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838996.0 + "value": 1838468.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6393580217228094 + "value": 0.9312986338546073 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2225503.0 + "value": 121871.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100524.0 + "value": 2101548.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.923515814105758 + "value": 3.041127018280761 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.032239502562946006 + "value": 0.035632302131764534 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39670,13 +42611,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.84582073060159 + "value": 98.84921316233583 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98150231320005 + "value": 99.98004037950605 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39688,7 +42629,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39700,7 +42641,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8467251200.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39712,13 +42653,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -39730,13 +42671,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 971702272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.53491031913527 + "value": 36.47685696535977 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39748,13 +42689,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.012905627976437 + "value": 24.329841065894787 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5639051605813339 + "value": 1.6334732160940102 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39766,7 +42707,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.68423620312188 + "value": 87.97622665645966 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39785,21 +42726,38 @@ "time" ], "times": { - "compilation": 14506.503, - "data": 60028.933, - "framework": 3106114.784, - "kernel_overhead": 1473637.759, - "profiling_overhead": 49935.384, - "profiling_runs": 1522512.708, + "compilation": 15735.258, + "data": 64898.087, + "framework": 3123900.325, + "kernel_overhead": 1479218.67, + "profiling_overhead": 55010.031, + "profiling_runs": 1524773.537, "runtimes": [ - 7634.368 + 6905.696 ], - "search_algorithm": 24.451, - "validation": 13.449 + "search_algorithm": 25.161, + "validation": 16.746 }, - "timestamp": "2026-01-27 09:26:52 UTC" + "timestamp": "2026-03-02 14:29:6 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -39816,61 +42774,61 @@ { "name": "time", "unit": "", - "value": 7635.968 + "value": 6891.904 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.639965956317976 + "value": 7.05380661603722 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109940.0 + "value": 6516.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838972.0 + "value": 1841008.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6427176601487028 + "value": 0.9372223724361698 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2233008.0 + "value": 122562.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101938.0 + "value": 2108991.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9235324518332404 + "value": 3.041029516167503 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03223920651672742 + "value": 0.035632153343992626 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39900,13 +42858,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.81359659857272 + "value": 98.82300965108269 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98345348240363 + "value": 99.97952430698318 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39918,7 +42876,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39930,7 +42888,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8467251200.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39942,13 +42900,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -39960,13 +42918,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 971702272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.534246092428667 + "value": 36.47702753000923 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39978,13 +42936,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.01227391352905 + "value": 24.329865057979646 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5638602804777717 + "value": 1.6334748268907233 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39996,7 +42954,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.6819627523962 + "value": 87.97633008835561 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40015,21 +42973,38 @@ "time" ], "times": { - "compilation": 15969.029, - "data": 63997.246, - "framework": 3115864.824, - "kernel_overhead": 1474473.164, - "profiling_overhead": 53808.02, - "profiling_runs": 1523586.394, + "compilation": 16297.62, + "data": 62623.752, + "framework": 3122332.74, + "kernel_overhead": 1480593.667, + "profiling_overhead": 52806.692, + "profiling_runs": 1526308.629, "runtimes": [ - 7635.968 + 6891.904 ], - "search_algorithm": 39.189, - "validation": 19.344 + "search_algorithm": 27.91, + "validation": 20.141 }, - "timestamp": "2026-01-27 09:26:54 UTC" + "timestamp": "2026-03-02 14:29:8 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -40046,61 +43021,61 @@ { "name": "time", "unit": "", - "value": 7709.216 + "value": 6913.024 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.654210623972856 + "value": 7.036414948780798 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2114044.0 + "value": 6956.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842068.0 + "value": 1837652.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.657946001379558 + "value": 0.9283849954041656 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2239354.0 + "value": 117318.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2110240.0 + "value": 2100910.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9267962703386474 + "value": 3.0388759222956625 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03227501337175236 + "value": 0.035603729792029366 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40130,13 +43105,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.65158840561398 + "value": 98.56273970341462 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97741817740416 + "value": 99.96075373672561 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40148,7 +43123,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40160,7 +43135,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8467251200.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -40172,13 +43147,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -40190,13 +43165,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 971702272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.56647047189112 + "value": 36.45457990223357 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40208,13 +43183,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.03805238901035 + "value": 24.315022275682924 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5656917102543972 + "value": 1.6324783022003915 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40226,7 +43201,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.77527892759852 + "value": 87.92265524617609 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40245,21 +43220,38 @@ "time" ], "times": { - "compilation": 14964.365, - "data": 63909.252, - "framework": 3117832.2010000004, - "kernel_overhead": 1475488.094, - "profiling_overhead": 53769.574, - "profiling_runs": 1524665.281, + "compilation": 14963.764, + "data": 61066.177, + "framework": 3115363.378, + "kernel_overhead": 1478756.242, + "profiling_overhead": 51340.994, + "profiling_runs": 1524199.965, "runtimes": [ - 7709.216 + 6913.024 ], - "search_algorithm": 39.457, - "validation": 20.363 + "search_algorithm": 37.827, + "validation": 19.253 }, - "timestamp": "2026-01-27 09:26:56 UTC" + "timestamp": "2026-03-02 14:29:9 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -40276,61 +43268,61 @@ { "name": "time", "unit": "", - "value": 9789.92 + "value": 9363.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.935522445645505 + "value": 5.3220848855615985 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109256.0 + "value": 9792.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2226124.0 + "value": 2279648.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 80.1826891031837 + "value": 81.4611827008901 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135660159.0 + "value": 133101144.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424472.0 + "value": 138417978.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.220333204288555 + "value": 1.1175336782359646 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012562160356263194 + "value": 0.0130641842051674 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40360,13 +43352,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.949708421784 + "value": 98.86691024830382 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.68000049735441 + "value": 101.3181415536679 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40378,7 +43370,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40390,7 +43382,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1883766784.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -40402,13 +43394,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1262485504.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -40420,13 +43412,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 259604480.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.538320077230029 + "value": 7.67238810827138 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40438,13 +43430,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.380964923631623 + "value": 17.60490855299957 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.115418265487774 + "value": 5.112558282176023 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40456,7 +43448,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.80944427226605 + "value": 16.957266099828598 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40475,21 +43467,38 @@ "time" ], "times": { - "compilation": 24373.0, - "data": 61714.977, - "framework": 560610.72, - "kernel_overhead": 191217.135, - "profiling_overhead": 51391.878, - "profiling_runs": 256286.73, + "compilation": 67032.547, + "data": 66437.023, + "framework": 566409.247, + "kernel_overhead": 189763.668, + "profiling_overhead": 56350.57, + "profiling_runs": 253857.986, "runtimes": [ - 9789.92 + 9363.008 ], - "search_algorithm": 39.94, - "validation": 25.13 + "search_algorithm": 28.102, + "validation": 23.69 }, - "timestamp": "2026-01-27 09:26:56 UTC" + "timestamp": "2026-03-02 14:29:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -40506,61 +43515,61 @@ { "name": "time", "unit": "", - "value": 9677.28 + "value": 9490.112 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.264475278921504 + "value": 6.19711906912455 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2111336.0 + "value": 30032.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2230948.0 + "value": 2288456.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 80.13947559938268 + "value": 81.83341451784501 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135549736.0 + "value": 133091404.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421311.0 + "value": 138421294.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1989559719859115 + "value": 1.0975096503264532 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012470936415700642 + "value": 0.012772913971513908 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40590,13 +43599,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7640648596998 + "value": 98.70413151566078 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.19403507692411 + "value": 98.34960656763944 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40608,7 +43617,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40620,7 +43629,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1883766784.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -40632,13 +43641,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1262485504.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -40650,13 +43659,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 259604480.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.52060479811597 + "value": 7.72772124193562 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40668,13 +43677,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.340142069625575 + "value": 17.731931847752954 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.103403629134187 + "value": 5.149446516821811 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40686,7 +43695,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.769978418321493 + "value": 17.07963513952265 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40705,21 +43714,38 @@ "time" ], "times": { - "compilation": 26657.644, - "data": 62524.088, - "framework": 566596.002, - "kernel_overhead": 194359.785, - "profiling_overhead": 51958.264, - "profiling_runs": 257753.865, + "compilation": 18275.921, + "data": 65035.623, + "framework": 560221.8, + "kernel_overhead": 189245.162, + "profiling_overhead": 55073.237, + "profiling_runs": 250867.778, "runtimes": [ - 9677.28 + 9490.112 ], - "search_algorithm": 39.16, - "validation": 23.568 + "search_algorithm": 27.549, + "validation": 18.525 }, - "timestamp": "2026-01-27 09:26:56 UTC" + "timestamp": "2026-03-02 14:29:10 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -40736,61 +43762,61 @@ { "name": "time", "unit": "", - "value": 9428.192 + "value": 9606.88 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.73558874966368 + "value": 6.103116125903225 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2108032.0 + "value": 22996.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2182136.0 + "value": 2199796.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.6809738385578 + "value": 81.76483500401989 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 135001939.0 + "value": 133028226.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416137.0 + "value": 138422079.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.229001380197509 + "value": 1.1027481499887914 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012883497139744111 + "value": 0.012813657757462234 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40820,13 +43846,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82340932896282 + "value": 98.87424825361612 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.84469065033247 + "value": 97.67191979449011 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40838,7 +43864,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40850,7 +43876,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1883766784.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -40862,13 +43888,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1262485504.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -40880,13 +43906,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 259604480.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.640892681097533 + "value": 7.805661107827541 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40898,13 +43924,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.617629921254824 + "value": 17.91191787260781 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.18507150148259 + "value": 5.201715407584714 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40916,7 +43942,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.03832718903714 + "value": 17.252980480585464 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40935,21 +43961,38 @@ "time" ], "times": { - "compilation": 29102.886, - "data": 61642.534, - "framework": 564268.074, - "kernel_overhead": 194156.797, - "profiling_overhead": 50910.952, - "profiling_runs": 257557.791, + "compilation": 18799.33, + "data": 64353.537, + "framework": 560843.809, + "kernel_overhead": 190037.718, + "profiling_overhead": 54610.828, + "profiling_runs": 251841.726, "runtimes": [ - 9428.192 + 9606.88 ], - "search_algorithm": 42.42, - "validation": 30.43 + "search_algorithm": 30.038, + "validation": 26.663 }, - "timestamp": "2026-01-27 09:26:57 UTC" + "timestamp": "2026-03-02 14:29:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -40966,61 +44009,61 @@ { "name": "time", "unit": "", - "value": 9443.232 + "value": 9614.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.734055176267333 + "value": 5.991099422655921 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101132.0 + "value": 20016.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2177404.0 + "value": 2200828.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.82794390184355 + "value": 81.8205070250143 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 134965406.0 + "value": 132985805.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419478.0 + "value": 138425471.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2454973877280564 + "value": 1.1122671887745297 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012875657362663008 + "value": 0.01284624245839947 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41050,13 +44093,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.72367393049755 + "value": 98.7300263386235 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.99953546308944 + "value": 98.12346426349517 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41068,7 +44111,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41080,7 +44123,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1883766784.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -41092,13 +44135,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1262485504.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -41110,13 +44153,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 259604480.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.701510341592612 + "value": 7.789651769092458 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41128,13 +44171,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.75721886329138 + "value": 17.874830620977086 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.2261541356683985 + "value": 5.1909450741338485 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41146,7 +44189,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.17334125124835 + "value": 17.217277158988388 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41165,21 +44208,38 @@ "time" ], "times": { - "compilation": 29248.542, - "data": 60960.411, - "framework": 558851.112, - "kernel_overhead": 192134.326, - "profiling_overhead": 50750.839, - "profiling_runs": 255005.536, + "compilation": 19970.347, + "data": 64151.336, + "framework": 560612.9750000001, + "kernel_overhead": 189712.119, + "profiling_overhead": 54009.674, + "profiling_runs": 252739.846, "runtimes": [ - 9443.232 + 9614.176 ], - "search_algorithm": 38.015, - "validation": 25.489 + "search_algorithm": 41.885, + "validation": 24.625 }, - "timestamp": "2026-01-27 09:26:57 UTC" + "timestamp": "2026-03-02 14:29:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 128, + "registers": 32 + }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", @@ -41196,61 +44256,61 @@ { "name": "time", "unit": "", - "value": 9193.024 + "value": 9339.136 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.908575959719515 + "value": 5.835555241531504 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2117384.0 + "value": 11580.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2169368.0 + "value": 2195272.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 78.05108536324727 + "value": 79.16668081653239 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114171912.0 + "value": 118937418.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425752.0 + "value": 138425960.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2844883431845513 + "value": 1.1374183450649098 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013123128608539977 + "value": 0.013202189294296568 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41280,13 +44340,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.51680723017081 + "value": 94.47917271430029 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.95755151415358 + "value": 98.35762785602634 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41298,7 +44358,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41310,7 +44370,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1883766784.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -41322,13 +44382,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1262485504.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 106954752.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -41340,13 +44400,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 259604480.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.933366896392122 + "value": 7.9862019547807614 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41358,13 +44418,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.291029787806693 + "value": 18.326376417764706 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.383260842090079 + "value": 5.322076354719512 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41376,7 +44436,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.689601771545796 + "value": 17.652206133071427 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41395,21 +44455,38 @@ "time" ], "times": { - "compilation": 28271.118, - "data": 62138.935, - "framework": 558314.538, - "kernel_overhead": 191179.425, - "profiling_overhead": 51750.962, - "profiling_runs": 253245.216, + "compilation": 20144.35, + "data": 63905.815, + "framework": 555627.475, + "kernel_overhead": 188262.938, + "profiling_overhead": 53734.288, + "profiling_runs": 249724.434, "runtimes": [ - 9193.024 + 9339.136 ], - "search_algorithm": 37.601, - "validation": 25.794 + "search_algorithm": 31.022, + "validation": 22.872 }, - "timestamp": "2026-01-27 09:26:57 UTC" + "timestamp": "2026-03-02 14:29:11 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -41426,61 +44503,61 @@ { "name": "time", "unit": "", - "value": 2966.624 + "value": 2741.984 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.66004672897196 + "value": 17.763345595376844 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104880.0 + "value": 1084.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870696.0 + "value": 1871724.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.177706327812932 + "value": 2.2278338722106152 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2140858.0 + "value": 50357.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104219.0 + "value": 2102907.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.740371648802647 + "value": 14.858438815052768 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16744021009603968 + "value": 0.1740279685378733 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41510,13 +44587,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.70489716966954 + "value": 98.76818115711123 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97244098236769 + "value": 99.93761382169838 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41528,7 +44605,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41540,7 +44617,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2772434944.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -41552,13 +44629,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1912602624.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -41570,13 +44647,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 400162816.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.699318539032156 + "value": 45.9424643250319 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41588,13 +44665,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.16868017276385 + "value": 59.43862808275651 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.67645851929202 + "value": 7.749078954148432 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41606,7 +44683,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.22343496315877 + "value": 88.38960912380051 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41625,21 +44702,38 @@ "time" ], "times": { - "compilation": 26849.438, - "data": 60048.03, - "framework": 1195260.8769999999, - "kernel_overhead": 529278.249, - "profiling_overhead": 49879.533, - "profiling_runs": 556055.065, + "compilation": 78706.291, + "data": 61578.182, + "framework": 1158989.48, + "kernel_overhead": 510063.884, + "profiling_overhead": 51632.745, + "profiling_runs": 535714.669, "runtimes": [ - 2966.624 + 2741.984 ], - "search_algorithm": 38.907, - "validation": 19.725 + "search_algorithm": 28.087, + "validation": 18.382 }, - "timestamp": "2026-01-27 09:26:58 UTC" + "timestamp": "2026-03-02 14:29:12 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -41656,61 +44750,61 @@ { "name": "time", "unit": "", - "value": 2886.016 + "value": 2827.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.805177709521715 + "value": 17.471133190118152 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098012.0 + "value": 1684.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871012.0 + "value": 1872116.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.19440826715859 + "value": 2.205505617701552 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2134975.0 + "value": 51523.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102794.0 + "value": 2103037.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.739880005315436 + "value": 14.858666207925905 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16742143468506704 + "value": 0.17405486212653737 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41740,13 +44834,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.40915630274817 + "value": 98.66905412491526 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94720414685509 + "value": 99.95760351894512 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41758,7 +44852,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41770,7 +44864,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2772434944.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -41782,13 +44876,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1912602624.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -41800,13 +44894,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 400162816.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.70470998339882 + "value": 45.941284150174724 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41818,13 +44912,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.176703300810686 + "value": 59.43592501321279 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.6775358436147165 + "value": 7.748726552015533 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41836,7 +44930,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.23543345949689 + "value": 88.38563263682863 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41855,21 +44949,38 @@ "time" ], "times": { - "compilation": 28868.826, - "data": 60256.597, - "framework": 1190889.8849999998, - "kernel_overhead": 527215.026, - "profiling_overhead": 49713.647, - "profiling_runs": 553704.615, + "compilation": 20721.005, + "data": 63780.955, + "framework": 1164913.355, + "kernel_overhead": 510609.198, + "profiling_overhead": 53831.034, + "profiling_runs": 536692.168, "runtimes": [ - 2886.016 + 2827.936 ], - "search_algorithm": 41.832, - "validation": 20.333 + "search_algorithm": 28.921, + "validation": 22.986 }, - "timestamp": "2026-01-27 09:26:59 UTC" + "timestamp": "2026-03-02 14:29:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -41886,61 +44997,61 @@ { "name": "time", "unit": "", - "value": 2898.88 + "value": 2844.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.260755009378286 + "value": 17.089884307327203 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2106328.0 + "value": 3652.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839988.0 + "value": 1839868.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.2016259725254805 + "value": 2.2109354565496284 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2156387.0 + "value": 54160.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104599.0 + "value": 2103858.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.742907744006292 + "value": 14.858681736165321 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16747046949653782 + "value": 0.17405179273134047 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41970,13 +45081,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71818221306063 + "value": 98.7630406715831 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94875301247184 + "value": 99.95277156020902 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41988,7 +45099,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42000,7 +45111,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2772434944.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -42012,13 +45123,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1912602624.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42030,13 +45141,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 400162816.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.71647093311248 + "value": 45.941960184606806 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42048,13 +45159,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.1925630538068 + "value": 59.437750107653585 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.679665449119565 + "value": 7.748964491573979 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42066,7 +45177,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.25903697901195 + "value": 88.38830683162813 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42085,21 +45196,38 @@ "time" ], "times": { - "compilation": 20729.216, - "data": 60549.092, - "framework": 1193017.8939999999, - "kernel_overhead": 527932.318, - "profiling_overhead": 50252.532, - "profiling_runs": 554283.952, + "compilation": 18424.607, + "data": 65284.481, + "framework": 1167916.444, + "kernel_overhead": 510575.42, + "profiling_overhead": 55411.733, + "profiling_runs": 536644.81, "runtimes": [ - 2898.88 + 2844.96 ], - "search_algorithm": 28.737, - "validation": 15.859 + "search_algorithm": 27.312, + "validation": 17.709 }, - "timestamp": "2026-01-27 09:26:59 UTC" + "timestamp": "2026-03-02 14:29:13 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -42116,61 +45244,61 @@ { "name": "time", "unit": "", - "value": 2950.72 + "value": 2842.304 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.539990131431395 + "value": 17.27052401355835 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103204.0 + "value": 8360.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839196.0 + "value": 1843836.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.203632938864868 + "value": 2.2134882201137924 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2154156.0 + "value": 56879.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101096.0 + "value": 2106558.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.7436151826913 + "value": 14.858447368478153 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16746220754929117 + "value": 0.1740560208821898 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42200,13 +45328,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.54821621641521 + "value": 98.69006744209399 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9473403466991 + "value": 99.95549800589139 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42218,7 +45346,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42230,7 +45358,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2772434944.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -42242,13 +45370,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1912602624.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42260,13 +45388,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 400162816.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.71483977423616 + "value": 45.94172142556084 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42278,13 +45406,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.190549855432806 + "value": 59.437572699555176 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.6793951221894625 + "value": 7.748941362686149 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42296,7 +45424,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.25607503232293 + "value": 88.38808286873345 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42315,21 +45443,38 @@ "time" ], "times": { - "compilation": 20025.926, - "data": 61011.849, - "framework": 1190394.786, - "kernel_overhead": 526293.503, - "profiling_overhead": 50223.043, - "profiling_runs": 552866.391, + "compilation": 17862.991, + "data": 63531.883, + "framework": 1162913.62, + "kernel_overhead": 509749.555, + "profiling_overhead": 53770.908, + "profiling_runs": 535861.274, "runtimes": [ - 2950.72 + 2842.304 ], - "search_algorithm": 27.173, - "validation": 13.932 + "search_algorithm": 39.781, + "validation": 22.311 }, - "timestamp": "2026-01-27 09:27:0 UTC" + "timestamp": "2026-03-02 14:29:14 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 32 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 31 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -42346,61 +45491,61 @@ { "name": "time", "unit": "", - "value": 2946.08 + "value": 2942.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.3441273720529 + "value": 17.022522389818526 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097376.0 + "value": 8640.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836284.0 + "value": 1840360.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.183121075111511 + "value": 2.203660020617124 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2144514.0 + "value": 56306.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098983.0 + "value": 2105523.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.76507042992011 + "value": 14.865823032102757 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 17301504.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1677252434476355 + "value": 0.17413233547342652 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42430,13 +45575,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.13349687939628 + "value": 98.55847586238731 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.948972566863 + "value": 99.95189125706314 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42448,7 +45593,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4982833152.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42460,7 +45605,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2772434944.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -42472,13 +45617,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 570425344.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1912602624.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42490,13 +45635,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 400162816.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.77931219896015 + "value": 45.96352502240998 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42508,13 +45653,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.2794446604516 + "value": 59.46577874689567 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.691331680480563 + "value": 7.752618615928293 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42526,7 +45671,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.38855498145833 + "value": 88.43002469663749 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42545,21 +45690,38 @@ "time" ], "times": { - "compilation": 18314.67, - "data": 61898.993, - "framework": 1192685.182, - "kernel_overhead": 526408.847, - "profiling_overhead": 51698.838, - "profiling_runs": 552678.504, + "compilation": 18294.88, + "data": 63470.487, + "framework": 1163535.364, + "kernel_overhead": 509991.199, + "profiling_overhead": 53797.205, + "profiling_runs": 536276.473, "runtimes": [ - 2946.08 + 2942.912 ], - "search_algorithm": 30.96, - "validation": 21.332 + "search_algorithm": 31.649, + "validation": 20.125 }, - "timestamp": "2026-01-27 09:27:1 UTC" + "timestamp": "2026-03-02 14:29:15 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -42576,61 +45738,61 @@ { "name": "time", "unit": "", - "value": 3838.592 + "value": 3415.52 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.339613493641053 + "value": 14.30445740384802 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107540.0 + "value": 12492.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871288.0 + "value": 1873664.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.241809909109633 + "value": 1.8115898631218814 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2160315.0 + "value": 71450.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105221.0 + "value": 2105994.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.920341785126696 + "value": 6.03729488107607 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0652788986225823 + "value": 0.07072896594631678 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42660,13 +45822,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.63103645215946 + "value": 98.70568532851564 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95324780051602 + "value": 99.96625239598114 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42678,7 +45840,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42690,7 +45852,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2963275776.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -42702,13 +45864,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1764753408.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42720,13 +45882,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 514490368.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.11179792696698 + "value": 42.22169988244044 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42738,13 +45900,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.584572394574515 + "value": 48.30060771718336 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.1675074626028277 + "value": 3.2428386528870665 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42756,7 +45918,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.45255218161616 + "value": 92.40394212827809 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42775,21 +45937,38 @@ "time" ], "times": { - "compilation": 16263.629, - "data": 59758.113, - "framework": 1963097.582, - "kernel_overhead": 911511.945, - "profiling_overhead": 49702.323, - "profiling_runs": 942125.201, + "compilation": 55945.424, + "data": 61814.739, + "framework": 1835174.9900000002, + "kernel_overhead": 846263.633, + "profiling_overhead": 51917.404, + "profiling_runs": 875179.214, "runtimes": [ - 3838.592 + 3415.52 ], - "search_algorithm": 24.409, - "validation": 16.34 + "search_algorithm": 27.814, + "validation": 18.762 }, - "timestamp": "2026-01-27 09:27:2 UTC" + "timestamp": "2026-03-02 14:29:16 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -42806,61 +45985,61 @@ { "name": "time", "unit": "", - "value": 3798.944 + "value": 3554.432 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.464091734706475 + "value": 14.076518557214884 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2107936.0 + "value": 8656.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1879024.0 + "value": 1871084.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.275547742467619 + "value": 1.794527600812627 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2158330.0 + "value": 64088.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104737.0 + "value": 2099278.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.920933497043442 + "value": 6.037124342642198 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06527046582067546 + "value": 0.07072559929412536 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42890,13 +46069,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.58737933958763 + "value": 98.62715405681988 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94379863912566 + "value": 99.96403789183842 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42908,7 +46087,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42920,7 +46099,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2963275776.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -42932,13 +46111,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1764753408.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42950,13 +46129,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 514490368.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.110903762826716 + "value": 42.220777670583956 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42968,13 +46147,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.58302760181231 + "value": 48.29937859289387 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.167397712921724 + "value": 3.2427561311147004 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42986,7 +46165,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.4496192730343 + "value": 92.4016237963141 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43005,21 +46184,38 @@ "time" ], "times": { - "compilation": 16113.744, - "data": 61520.031, - "framework": 1960876.639, - "kernel_overhead": 908802.276, - "profiling_overhead": 51127.168, - "profiling_runs": 939427.164, + "compilation": 16784.544, + "data": 65951.887, + "framework": 1844479.3020000001, + "kernel_overhead": 846476.635, + "profiling_overhead": 56227.625, + "profiling_runs": 875823.155, "runtimes": [ - 3798.944 + 3554.432 ], - "search_algorithm": 34.301, - "validation": 17.734 + "search_algorithm": 25.443, + "validation": 17.297 }, - "timestamp": "2026-01-27 09:27:3 UTC" + "timestamp": "2026-03-02 14:29:16 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -43036,61 +46232,61 @@ { "name": "time", "unit": "", - "value": 3888.576 + "value": 3575.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.160272538531917 + "value": 13.79049516861903 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2109376.0 + "value": 2196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842448.0 + "value": 1839208.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.247741266103399 + "value": 1.790323728902174 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2173287.0 + "value": 59167.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103834.0 + "value": 2099244.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.921824139251356 + "value": 6.037349238153336 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06529470906153788 + "value": 0.07072879495988264 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43120,13 +46316,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.701558355762 + "value": 98.69873117570742 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96346883110196 + "value": 99.96394722997593 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43138,7 +46334,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43150,7 +46346,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2963275776.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -43162,13 +46358,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1764753408.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -43180,13 +46376,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 514490368.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.116625809974 + "value": 42.22295182879703 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43198,13 +46394,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.59081093046387 + "value": 48.30160475909495 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.167950678897701 + "value": 3.2429055929568142 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43216,7 +46412,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.46450588359913 + "value": 92.40584957018105 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43235,21 +46431,38 @@ "time" ], "times": { - "compilation": 14921.683, - "data": 61362.958, - "framework": 1956074.3960000002, - "kernel_overhead": 906600.312, - "profiling_overhead": 50826.795, - "profiling_runs": 937284.331, + "compilation": 16415.616, + "data": 62488.649, + "framework": 1836528.683, + "kernel_overhead": 846164.012, + "profiling_overhead": 52537.018, + "profiling_runs": 875339.004, "runtimes": [ - 3888.576 + 3575.2 ], - "search_algorithm": 22.67, - "validation": 14.919 + "search_algorithm": 25.555, + "validation": 19.025 }, - "timestamp": "2026-01-27 09:27:4 UTC" + "timestamp": "2026-03-02 14:29:17 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -43266,61 +46479,61 @@ { "name": "time", "unit": "", - "value": 4065.76 + "value": 3511.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.34573177762404 + "value": 13.619644538606405 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105048.0 + "value": 10296.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841348.0 + "value": 1841104.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2851480992681577 + "value": 1.8034813613845255 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2170344.0 + "value": 68726.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105011.0 + "value": 2104955.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.921794866539238 + "value": 6.037435403295708 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06529511500293424 + "value": 0.07073021172957644 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43350,13 +46563,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.62796612792587 + "value": 98.62663703605267 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9679176905339 + "value": 99.96521031819559 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43368,7 +46581,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43380,7 +46593,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2963275776.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -43392,13 +46605,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1764753408.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -43410,13 +46623,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 514490368.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.1153806607518 + "value": 42.22322252217325 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43428,13 +46641,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.589103722914324 + "value": 48.301961972933185 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.16782939047072 + "value": 3.2429295758194887 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43446,7 +46659,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.4612650104988 + "value": 92.40656606508587 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43465,21 +46678,38 @@ "time" ], "times": { - "compilation": 14331.455, - "data": 64541.018, - "framework": 1964715.099, - "kernel_overhead": 907647.137, - "profiling_overhead": 54267.519, - "profiling_runs": 938259.425, + "compilation": 15957.24, + "data": 61037.96, + "framework": 1836748.4849999999, + "kernel_overhead": 847637.381, + "profiling_overhead": 51167.062, + "profiling_runs": 876906.082, "runtimes": [ - 4065.76 + 3511.232 ], - "search_algorithm": 24.14, - "validation": 17.987 + "search_algorithm": 26.16, + "validation": 20.673 }, - "timestamp": "2026-01-27 09:27:5 UTC" + "timestamp": "2026-03-02 14:29:18 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -43496,61 +46726,61 @@ { "name": "time", "unit": "", - "value": 3784.672 + "value": 3511.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.450228255431675 + "value": 13.875535470734002 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097368.0 + "value": 6140.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835648.0 + "value": 1842996.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2591998329029312 + "value": 1.7954308372664665 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2157472.0 + "value": 63498.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2101561.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.907904367166177 + "value": 6.017775570686679 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06514314686991102 + "value": 0.07044975991297436 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43580,13 +46810,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.35872783046771 + "value": 97.88698364238269 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95623377054027 + "value": 99.87989539259831 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43598,7 +46828,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43610,7 +46840,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2963275776.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -43622,13 +46852,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1764753408.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -43640,13 +46870,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 514490368.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.04199130385748 + "value": 42.09159777743576 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43658,13 +46888,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.49052675588708 + "value": 48.15153497930194 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.1608259975495954 + "value": 3.2328301072529384 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43676,7 +46906,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.27231650751766 + "value": 92.11881657315995 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43695,21 +46925,38 @@ "time" ], "times": { - "compilation": 14841.792, - "data": 65182.277, - "framework": 1965264.793, - "kernel_overhead": 907449.779, - "profiling_overhead": 54901.21, - "profiling_runs": 937731.527, + "compilation": 16523.436, + "data": 66063.532, + "framework": 1841855.179, + "kernel_overhead": 845095.378, + "profiling_overhead": 56308.443, + "profiling_runs": 874387.826, "runtimes": [ - 3784.672 + 3511.744 ], - "search_algorithm": 24.233, - "validation": 15.707 + "search_algorithm": 25.734, + "validation": 19.267 }, - "timestamp": "2026-01-27 09:27:6 UTC" + "timestamp": "2026-03-02 14:29:19 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -43726,61 +46973,61 @@ { "name": "time", "unit": "", - "value": 6370.112 + "value": 5500.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.449185111144104 + "value": 8.917535216385462 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104108.0 + "value": 14148.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866520.0 + "value": 1870628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9529420203883165 + "value": 1.1412000921393666 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2196502.0 + "value": 109163.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100436.0 + "value": 2102920.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8561652782151044 + "value": 1.8628315264466573 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019333251414554282 + "value": 0.02182549444389482 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43810,13 +47057,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8187109177256 + "value": 73.90350555293487 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98806648055056 + "value": 99.97289330604909 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43828,7 +47075,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43840,7 +47087,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4400349184.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -43852,13 +47099,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1626341376.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -43870,13 +47117,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 869564416.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.09031275275254 + "value": 42.34384605715055 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43888,13 +47135,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.3994829854414 + "value": 29.807154814962228 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0408975835324183 + "value": 1.0588234925725106 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43906,7 +47153,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.51839862242483 + "value": 96.4462831899665 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43925,21 +47172,38 @@ "time" ], "times": { - "compilation": 14712.604, - "data": 62591.227, - "framework": 3144707.04, - "kernel_overhead": 1493517.207, - "profiling_overhead": 52276.215, - "profiling_runs": 1536322.391, + "compilation": 81890.705, + "data": 62898.288, + "framework": 3304737.0360000003, + "kernel_overhead": 1574772.148, + "profiling_overhead": 53128.46, + "profiling_runs": 1613938.14, "runtimes": [ - 6370.112 + 5500.928 ], - "search_algorithm": 42.298, - "validation": 19.184 + "search_algorithm": 25.371, + "validation": 14.5 }, - "timestamp": "2026-01-27 09:27:8 UTC" + "timestamp": "2026-03-02 14:29:21 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -43956,61 +47220,61 @@ { "name": "time", "unit": "", - "value": 6331.552 + "value": 5645.472 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.43884281745106 + "value": 8.82223856874319 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098996.0 + "value": 16108.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869388.0 + "value": 1870208.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9507855209856535 + "value": 1.1364284268810139 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2191896.0 + "value": 108091.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100560.0 + "value": 2106242.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.856128770695742 + "value": 1.862827447639034 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01933204755079571 + "value": 0.021826386865460148 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44040,13 +47304,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.78224769913786 + "value": 73.86837724439764 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98102006458238 + "value": 99.97861722805314 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44058,7 +47322,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44070,7 +47334,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4400349184.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -44082,13 +47346,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1626341376.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -44100,13 +47364,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 869564416.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.09055259183708 + "value": 42.343078488500936 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44118,13 +47382,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.399699568614977 + "value": 29.806667025276568 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0409061231277634 + "value": 1.0588061650824563 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44136,7 +47400,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.51910022124717 + "value": 96.4447048607884 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44155,21 +47419,38 @@ "time" ], "times": { - "compilation": 14492.703, - "data": 63589.396, - "framework": 3153478.743, - "kernel_overhead": 1496769.331, - "profiling_overhead": 53709.176, - "profiling_runs": 1539410.84, + "compilation": 18053.297, + "data": 65717.839, + "framework": 3315162.654, + "kernel_overhead": 1577164.395, + "profiling_overhead": 55693.982, + "profiling_runs": 1616586.438, "runtimes": [ - 6331.552 + 5645.472 ], - "search_algorithm": 26.842, - "validation": 16.854 + "search_algorithm": 26.769, + "validation": 19.347 }, - "timestamp": "2026-01-27 09:27:9 UTC" + "timestamp": "2026-03-02 14:29:23 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -44186,61 +47467,61 @@ { "name": "time", "unit": "", - "value": 6372.064 + "value": 5839.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.327073455945524 + "value": 8.630559126729043 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2102600.0 + "value": 16260.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838092.0 + "value": 1839788.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9571882812544885 + "value": 1.1386888891702602 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2205364.0 + "value": 109338.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100396.0 + "value": 2106083.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.856600755693582 + "value": 1.8628765599866357 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019335631600577715 + "value": 0.021826663678289473 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44270,13 +47551,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.83099396329749 + "value": 73.90123455253439 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9757556191111 + "value": 99.97697302968265 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44288,7 +47569,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44300,7 +47581,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4400349184.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -44312,13 +47593,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1626341376.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -44330,13 +47611,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 869564416.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.097585146907683 + "value": 42.34436950801246 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44348,13 +47629,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.40598431273635 + "value": 29.80753524771838 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0411539224870412 + "value": 1.0588370064802306 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44366,7 +47647,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.53945900074706 + "value": 96.44751414694855 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44385,21 +47666,38 @@ "time" ], "times": { - "compilation": 14267.295, - "data": 62580.347, - "framework": 3144320.111, - "kernel_overhead": 1493150.313, - "profiling_overhead": 52480.345, - "profiling_runs": 1536109.106, + "compilation": 16470.423, + "data": 61850.789, + "framework": 3309157.5810000002, + "kernel_overhead": 1577748.124, + "profiling_overhead": 51964.899, + "profiling_runs": 1617593.769, "runtimes": [ - 6372.064 + 5839.2 ], - "search_algorithm": 22.745, - "validation": 14.575 + "search_algorithm": 27.492, + "validation": 16.258 }, - "timestamp": "2026-01-27 09:27:11 UTC" + "timestamp": "2026-03-02 14:29:25 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -44416,61 +47714,61 @@ { "name": "time", "unit": "", - "value": 6562.272 + "value": 5678.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.12008079228564 + "value": 8.591648465768426 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2115160.0 + "value": 11272.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843416.0 + "value": 1836628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.948877044205339 + "value": 1.1321781396723627 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2221016.0 + "value": 101817.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2110732.0 + "value": 2101128.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.856599930220589 + "value": 1.862827853036964 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01933561760492877 + "value": 0.02182551692799029 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44500,13 +47798,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.82010719685273 + "value": 73.86891850700779 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98068778875184 + "value": 99.97463247680257 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44518,7 +47816,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44530,7 +47828,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4400349184.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -44542,13 +47840,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1626341376.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -44560,13 +47858,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 869564416.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.09599390676621 + "value": 42.34320769028313 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44578,13 +47876,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.40466256080919 + "value": 29.806666992179693 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.041101807512374 + "value": 1.0588061639067736 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44596,7 +47894,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.53517732188807 + "value": 96.44470475369766 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44615,21 +47913,38 @@ "time" ], "times": { - "compilation": 14239.689, - "data": 63828.637, - "framework": 3151802.0990000004, - "kernel_overhead": 1495569.247, - "profiling_overhead": 53765.617, - "profiling_runs": 1538638.598, + "compilation": 15530.091, + "data": 63575.907, + "framework": 3313732.618, + "kernel_overhead": 1578314.781, + "profiling_overhead": 53879.474, + "profiling_runs": 1617962.456, "runtimes": [ - 6562.272 + 5678.336 ], - "search_algorithm": 24.41, - "validation": 17.135 + "search_algorithm": 27.303, + "validation": 17.739 }, - "timestamp": "2026-01-27 09:27:12 UTC" + "timestamp": "2026-03-02 14:29:26 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 56 + }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", @@ -44646,61 +47961,61 @@ { "name": "time", "unit": "", - "value": 6332.384 + "value": 5639.968 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.298597930164252 + "value": 8.692012854116548 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097596.0 + "value": 16924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837308.0 + "value": 1842260.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9537876565451893 + "value": 1.1372204853789638 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2200258.0 + "value": 110023.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099175.0 + "value": 2106770.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8560269515111565 + "value": 1.8625157323214108 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019330132870752515 + "value": 0.02181796410341218 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44730,13 +48045,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.67779864241542 + "value": 65.55780491403087 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97334093801861 + "value": 99.94145854759049 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44748,7 +48063,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44760,7 +48075,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4400349184.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -44772,13 +48087,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1626341376.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -44790,13 +48105,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 869564416.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.089959678056093 + "value": 42.34262199029218 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44808,13 +48123,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.399112501965206 + "value": 29.806242663222516 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0408829758465283 + "value": 1.0587910906979678 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44826,7 +48141,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.51716307549803 + "value": 96.44329645311322 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44845,21 +48160,38 @@ "time" ], "times": { - "compilation": 14260.869, - "data": 60529.955, - "framework": 3138992.471, - "kernel_overhead": 1492675.191, - "profiling_overhead": 50471.71, - "profiling_runs": 1535315.615, + "compilation": 16080.574, + "data": 60861.705, + "framework": 3303154.818, + "kernel_overhead": 1575884.462, + "profiling_overhead": 51136.997, + "profiling_runs": 1615271.654, "runtimes": [ - 6332.384 + 5639.968 ], - "search_algorithm": 24.349, - "validation": 14.094 + "search_algorithm": 24.76, + "validation": 17.113 }, - "timestamp": "2026-01-27 09:27:14 UTC" + "timestamp": "2026-03-02 14:29:28 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -44876,61 +48208,61 @@ { "name": "time", "unit": "", - "value": 3533.888 + "value": 3303.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.862638618817506 + "value": 14.734742007122565 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2101876.0 + "value": 11784.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868928.0 + "value": 1873584.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.539076620707305 + "value": 1.8635238509652818 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2146857.0 + "value": 71843.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101046.0 + "value": 2101397.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.404053576823852 + "value": 6.226980936471538 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07060906756497091 + "value": 0.07294578628558666 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44960,13 +48292,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.64574348147805 + "value": 98.73430351740676 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9598789098737 + "value": 99.95998125480881 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44978,7 +48310,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44990,7 +48322,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2091909120.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45002,13 +48334,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1830813696.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -45020,13 +48352,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 485130240.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.572416707598734 + "value": 42.76934537910117 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45038,13 +48370,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.22180390442517 + "value": 49.81759314662223 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4259142910614564 + "value": 3.344687039873319 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45056,7 +48388,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.14956039963747 + "value": 89.85728925772315 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45075,21 +48407,38 @@ "time" ], "times": { - "compilation": 14132.846, - "data": 59400.474, - "framework": 1729719.79, - "kernel_overhead": 795972.029, - "profiling_overhead": 49342.741, - "profiling_runs": 825004.546, + "compilation": 65293.297, + "data": 63496.608, + "framework": 1712998.582, + "kernel_overhead": 783656.865, + "profiling_overhead": 53741.778, + "profiling_runs": 812103.331, "runtimes": [ - 3533.888 + 3303.008 ], - "search_algorithm": 25.714, - "validation": 16.771 + "search_algorithm": 27.969, + "validation": 15.797 }, - "timestamp": "2026-01-27 09:27:15 UTC" + "timestamp": "2026-03-02 14:29:29 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -45106,61 +48455,61 @@ { "name": "time", "unit": "", - "value": 3620.704 + "value": 3407.68 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.173632111797744 + "value": 14.516516001352505 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2105684.0 + "value": 220.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870444.0 + "value": 1868188.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.50687303374669 + "value": 1.8492495276314964 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153353.0 + "value": 54216.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100801.0 + "value": 2098982.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.403246636950552 + "value": 6.226192736427134 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0706025358766947 + "value": 0.07294506746083075 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45190,13 +48539,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.5720815345859 + "value": 98.65698348810352 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.953246576139 + "value": 99.96305330552224 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45208,7 +48557,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45220,7 +48569,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2091909120.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45232,13 +48581,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1830813696.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -45250,13 +48599,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 485130240.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.57138572373798 + "value": 42.76742415948237 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45268,13 +48617,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.220542579817334 + "value": 49.81557126017632 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4258246803532337 + "value": 3.3445512931026578 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45286,7 +48635,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.14730707907763 + "value": 89.85367647477464 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45305,21 +48654,38 @@ "time" ], "times": { - "compilation": 14166.068, - "data": 65583.281, - "framework": 1738984.213, - "kernel_overhead": 794347.611, - "profiling_overhead": 55366.733, - "profiling_runs": 823686.588, + "compilation": 16715.234, + "data": 66066.615, + "framework": 1723266.625, + "kernel_overhead": 786346.833, + "profiling_overhead": 56019.978, + "profiling_runs": 814833.199, "runtimes": [ - 3620.704 + 3407.68 ], - "search_algorithm": 24.735, - "validation": 16.222 + "search_algorithm": 26.945, + "validation": 18.512 }, - "timestamp": "2026-01-27 09:27:16 UTC" + "timestamp": "2026-03-02 14:29:30 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -45336,61 +48702,61 @@ { "name": "time", "unit": "", - "value": 3491.936 + "value": 3406.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.6207020643571 + "value": 14.280191531255593 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2099228.0 + "value": 404.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838496.0 + "value": 1837660.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.552254020274117 + "value": 1.8509482269161568 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2157060.0 + "value": 56337.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100795.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.406146015934075 + "value": 6.22672486742929 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07062818273708808 + "value": 0.07294146056014625 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45420,13 +48786,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7006498248347 + "value": 98.71937435301028 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95836429686736 + "value": 99.95405450017589 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45438,7 +48804,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45450,7 +48816,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2091909120.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45462,13 +48828,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1830813696.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -45480,13 +48846,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 485130240.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.583059621888495 + "value": 42.76929635896512 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45498,13 +48864,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.23558930862095 + "value": 49.81759268435235 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4268936740255604 + "value": 3.3446870088371328 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45516,7 +48882,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.17447373392126 + "value": 89.85729065093486 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45535,21 +48901,38 @@ "time" ], "times": { - "compilation": 14245.257, - "data": 62099.242, - "framework": 1736895.947, - "kernel_overhead": 796987.07, - "profiling_overhead": 51776.163, - "profiling_runs": 826033.472, + "compilation": 17545.1, + "data": 64000.657, + "framework": 1715930.788, + "kernel_overhead": 784525.639, + "profiling_overhead": 54279.984, + "profiling_runs": 813124.508, "runtimes": [ - 3491.936 + 3406.08 ], - "search_algorithm": 35.401, - "validation": 16.643 + "search_algorithm": 27.785, + "validation": 16.29 }, - "timestamp": "2026-01-27 09:27:17 UTC" + "timestamp": "2026-03-02 14:29:31 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -45566,61 +48949,61 @@ { "name": "time", "unit": "", - "value": 3511.808 + "value": 3386.976 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.588595410442014 + "value": 14.384454159830327 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097336.0 + "value": 1124.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837256.0 + "value": 1839276.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5659581226574644 + "value": 1.854213525729012 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153183.0 + "value": 58944.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2103061.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.4058082535730945 + "value": 6.226824935224893 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07062106711260009 + "value": 0.07295057159456247 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45650,13 +49033,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.633744210871 + "value": 98.67379208432195 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94970249697312 + "value": 99.97185691856738 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45668,7 +49051,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45680,7 +49063,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2091909120.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45692,13 +49075,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1830813696.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -45710,13 +49093,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 485130240.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.58218856575422 + "value": 42.76728674590101 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45728,13 +49111,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.23490943723587 + "value": 49.81494300186261 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4268453726161225 + "value": 3.344509112673881 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45746,7 +49129,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.17327180009133 + "value": 89.8525432684732 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45765,21 +49148,38 @@ "time" ], "times": { - "compilation": 14155.711, - "data": 61969.538, - "framework": 1734272.6430000002, - "kernel_overhead": 795925.438, - "profiling_overhead": 51523.251, - "profiling_runs": 824854.416, + "compilation": 16873.007, + "data": 62739.409, + "framework": 1721058.886, + "kernel_overhead": 788336.592, + "profiling_overhead": 52925.188, + "profiling_runs": 817057.697, "runtimes": [ - 3511.808 + 3386.976 ], - "search_algorithm": 25.246, - "validation": 14.978 + "search_algorithm": 25.535, + "validation": 19.704 }, - "timestamp": "2026-01-27 09:27:18 UTC" + "timestamp": "2026-03-02 14:29:32 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 16 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 38 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -45796,61 +49196,61 @@ { "name": "time", "unit": "", - "value": 3504.032 + "value": 3360.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.687016973549476 + "value": 14.338697775312506 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2097400.0 + "value": 11736.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836068.0 + "value": 1841200.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5457878391203854 + "value": 1.8576682001786744 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2153032.0 + "value": 68297.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099161.0 + "value": 2105750.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.426831968088507 + "value": 6.252194500587304 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8912896.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.070857541167205 + "value": 0.07322342906605447 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45880,13 +49280,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.45264236598801 + "value": 98.19986492559298 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95114272201135 + "value": 99.92388034089177 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45898,7 +49298,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4647288832.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45910,7 +49310,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2091909120.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45922,13 +49322,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 301989888.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1830813696.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -45940,13 +49340,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 485130240.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.703666069691806 + "value": 42.94796885258666 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45958,13 +49358,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.39572627133095 + "value": 50.025273309938 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4382705920305927 + "value": 3.3586304102131224 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45976,7 +49376,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.4639028909865 + "value": 90.23195558042815 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45995,21 +49395,38 @@ "time" ], "times": { - "compilation": 14244.202, - "data": 61499.368, - "framework": 1733942.133, - "kernel_overhead": 796128.699, - "profiling_overhead": 51319.635, - "profiling_runs": 824994.431, + "compilation": 16737.263, + "data": 65351.0, + "framework": 1720440.997, + "kernel_overhead": 785432.471, + "profiling_overhead": 55603.067, + "profiling_runs": 814054.459, "runtimes": [ - 3504.032 + 3360.8 ], - "search_algorithm": 24.241, - "validation": 15.581 + "search_algorithm": 25.11, + "validation": 17.083 }, - "timestamp": "2026-01-27 09:27:19 UTC" + "timestamp": "2026-03-02 14:29:33 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -46026,61 +49443,61 @@ { "name": "time", "unit": "", - "value": 6954.176 + "value": 6287.136 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.185445765774567 + "value": 7.928222619343274 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110776.0 + "value": 10304.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869220.0 + "value": 1869352.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8193543674823522 + "value": 1.018673155172287 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2213950.0 + "value": 111886.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101990.0 + "value": 2102951.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7297758285761606 + "value": 1.6606701247938778 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018016545487510742 + "value": 0.019456948461380998 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46110,13 +49527,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.93541509668026 + "value": 82.02892972522397 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97678461888452 + "value": 99.9762894171048 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46128,7 +49545,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46140,7 +49557,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6516375552.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -46152,13 +49569,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1255669760.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -46170,13 +49587,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 953466880.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.9016389573897 + "value": 43.24349202071664 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46188,13 +49605,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.604302088115226 + "value": 26.57152056168148 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.970115914851223 + "value": 0.9438858012023086 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46206,7 +49623,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.39346363771948 + "value": 94.27376038338296 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46225,21 +49642,38 @@ "time" ], "times": { - "compilation": 15046.908, - "data": 61896.136, - "framework": 3636877.205, - "kernel_overhead": 1738845.687, - "profiling_overhead": 51624.341, - "profiling_runs": 1784511.041, + "compilation": 84302.687, + "data": 62413.083, + "framework": 3663554.362, + "kernel_overhead": 1752994.527, + "profiling_overhead": 52490.516, + "profiling_runs": 1795656.236, "runtimes": [ - 6954.176 + 6287.136 ], - "search_algorithm": 24.83, - "validation": 17.298 + "search_algorithm": 25.718, + "validation": 15.775 }, - "timestamp": "2026-01-27 09:27:21 UTC" + "timestamp": "2026-03-02 14:29:35 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -46256,61 +49690,61 @@ { "name": "time", "unit": "", - "value": 6867.616 + "value": 6329.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.255985666145733 + "value": 7.835863956069759 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2120396.0 + "value": 9752.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1878088.0 + "value": 1869732.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8275188455635245 + "value": 1.0129348129641307 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2229081.0 + "value": 110865.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106476.0 + "value": 2102825.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7299936233645345 + "value": 1.6606315360307207 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0180165269965517 + "value": 0.01945654488053938 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46340,13 +49774,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.91095176667726 + "value": 81.99355804313643 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9618443629237 + "value": 99.97774028286662 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46358,7 +49792,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46370,7 +49804,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6516375552.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -46382,13 +49816,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1255669760.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -46400,13 +49834,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 953466880.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.906755162318284 + "value": 43.24199642488347 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46418,13 +49852,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.607954180981118 + "value": 26.57058381369405 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9702599121651491 + "value": 0.9438525256085166 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46436,7 +49870,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.40643572056638 + "value": 94.27043687183976 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46455,21 +49889,38 @@ "time" ], "times": { - "compilation": 14643.236, - "data": 60045.821, - "framework": 3633731.1059999997, - "kernel_overhead": 1738969.548, - "profiling_overhead": 50092.537, - "profiling_runs": 1784623.2, + "compilation": 15576.604, + "data": 60773.065, + "framework": 3668831.347, + "kernel_overhead": 1757044.977, + "profiling_overhead": 51062.306, + "profiling_runs": 1799950.999, "runtimes": [ - 6867.616 + 6329.44 ], - "search_algorithm": 28.51, - "validation": 15.06 + "search_algorithm": 26.668, + "validation": 16.22 }, - "timestamp": "2026-01-27 09:27:22 UTC" + "timestamp": "2026-03-02 14:29:36 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -46486,61 +49937,61 @@ { "name": "time", "unit": "", - "value": 6727.168 + "value": 6530.24 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.286286756302855 + "value": 7.707821922194354 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2098036.0 + "value": 9844.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839868.0 + "value": 1838948.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8388108469615132 + "value": 1.0130702548518211 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2212507.0 + "value": 111867.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103441.0 + "value": 2102817.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7302822342407522 + "value": 1.6606978132871584 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01802073123223261 + "value": 0.019456446298450676 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46570,13 +50021,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.95285947958709 + "value": 82.03837862664565 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97745868738565 + "value": 99.97036882031011 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46588,7 +50039,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46600,7 +50051,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6516375552.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -46612,13 +50063,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1255669760.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -46630,13 +50081,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 953466880.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.90960636455955 + "value": 43.24505204352288 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46648,13 +50099,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.60985242617757 + "value": 26.572408397564867 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9703347575262885 + "value": 0.9439173393177949 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46666,7 +50117,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.41317820901241 + "value": 94.27691035845254 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46685,21 +50136,38 @@ "time" ], "times": { - "compilation": 13970.205, - "data": 63809.579, - "framework": 3642059.632, - "kernel_overhead": 1739821.306, - "profiling_overhead": 53662.601, - "profiling_runs": 1784766.146, + "compilation": 15645.104, + "data": 61147.31, + "framework": 3665365.4560000002, + "kernel_overhead": 1754918.443, + "profiling_overhead": 51425.715, + "profiling_runs": 1797873.988, "runtimes": [ - 6727.168 + 6530.24 ], - "search_algorithm": 24.276, - "validation": 15.563 + "search_algorithm": 24.347, + "validation": 18.061 }, - "timestamp": "2026-01-27 09:27:24 UTC" + "timestamp": "2026-03-02 14:29:38 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -46716,61 +50184,61 @@ { "name": "time", "unit": "", - "value": 6826.432 + "value": 6327.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.218508743315242 + "value": 7.700568860997267 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103644.0 + "value": 9084.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837604.0 + "value": 1837988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8271114867841498 + "value": 1.0123538243271357 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2212581.0 + "value": 110173.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2102954.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7302724505348734 + "value": 1.6606006742009345 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01802015670673189 + "value": 0.019456268853208437 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46800,13 +50268,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.93048812410923 + "value": 81.99679827504048 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98034792144796 + "value": 99.97544770239483 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46818,7 +50286,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46830,7 +50298,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6516375552.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -46842,13 +50310,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1255669760.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -46860,13 +50328,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 953466880.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.90735049434808 + "value": 43.242519753109185 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46878,13 +50346,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.608356677175845 + "value": 26.5708161534387 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9702757820712643 + "value": 0.9438607788880202 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46896,7 +50364,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.40786537040695 + "value": 94.2712611958134 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46915,21 +50383,38 @@ "time" ], "times": { - "compilation": 14057.589, - "data": 60965.751, - "framework": 3635528.9189999998, - "kernel_overhead": 1739340.226, - "profiling_overhead": 50731.041, - "profiling_runs": 1784491.901, + "compilation": 15147.041, + "data": 67363.488, + "framework": 3676673.6229999997, + "kernel_overhead": 1754545.057, + "profiling_overhead": 57488.886, + "profiling_runs": 1797276.192, "runtimes": [ - 6826.432 + 6327.2 ], - "search_algorithm": 25.921, - "validation": 16.825 + "search_algorithm": 31.288, + "validation": 17.75 }, - "timestamp": "2026-01-27 09:27:26 UTC" + "timestamp": "2026-03-02 14:29:40 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", @@ -46946,61 +50431,61 @@ { "name": "time", "unit": "", - "value": 6826.304 + "value": 6288.416 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.270860952541131 + "value": 7.76157417314915 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2111512.0 + "value": 17936.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839932.0 + "value": 1839020.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.832920165123025 + "value": 1.0213104399416653 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2220540.0 + "value": 120962.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102519.0 + "value": 2106542.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7316805758745704 + "value": 1.6630453379794148 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.018035472110185462 + "value": 0.01948480266261858 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47030,13 +50515,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.91133897550851 + "value": 81.88903025974514 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97206275942136 + "value": 99.9626868328243 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47048,7 +50533,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47060,7 +50545,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6516375552.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -47072,13 +50557,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1255669760.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -47090,13 +50575,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 953466880.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 34.940146305774924 + "value": 43.31132524956411 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47108,13 +50593,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.63131256348842 + "value": 26.613180789333935 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9711809030769971 + "value": 0.94536567501174 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47126,7 +50611,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.48941639208438 + "value": 94.42157964092183 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47145,21 +50630,38 @@ "time" ], "times": { - "compilation": 14609.396, - "data": 60711.413, - "framework": 3629098.42, - "kernel_overhead": 1736314.973, - "profiling_overhead": 50565.149, - "profiling_runs": 1781506.885, + "compilation": 18601.258, + "data": 61619.465, + "framework": 3665823.42, + "kernel_overhead": 1754853.565, + "profiling_overhead": 51706.383, + "profiling_runs": 1797644.007, "runtimes": [ - 6826.304 + 6288.416 ], - "search_algorithm": 27.523, - "validation": 14.944 + "search_algorithm": 39.993, + "validation": 19.886 }, - "timestamp": "2026-01-27 09:27:28 UTC" + "timestamp": "2026-03-02 14:29:42 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -47176,61 +50678,61 @@ { "name": "time", "unit": "", - "value": 6314.176 + "value": 5861.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.620917115643643 + "value": 8.3040225197671 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110628.0 + "value": 8888.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869712.0 + "value": 1867752.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9741191754067677 + "value": 1.0644309409637727 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2206861.0 + "value": 105127.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102352.0 + "value": 2101158.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8713363237882605 + "value": 1.7422209760148286 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019487285212468185 + "value": 0.020411815690473685 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47260,13 +50762,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97677782338405 + "value": 82.02231449338082 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95562025722963 + "value": 99.97475547349117 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47278,7 +50780,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47290,7 +50792,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5912395776.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -47302,13 +50804,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1087897600.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -47320,13 +50822,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 914653184.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.798576597845894 + "value": 44.05970765715054 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47338,13 +50840,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.618453277850723 + "value": 27.875969512573267 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0495312998957256 + "value": 0.9902230381053247 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47356,7 +50858,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.80427366997401 + "value": 94.98178497720443 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47375,21 +50877,38 @@ "time" ], "times": { - "compilation": 14541.055, - "data": 60908.078, - "framework": 3558445.183, - "kernel_overhead": 1701914.243, - "profiling_overhead": 50704.265, - "profiling_runs": 1744918.597, + "compilation": 82562.464, + "data": 62606.884, + "framework": 3577884.034, + "kernel_overhead": 1710718.066, + "profiling_overhead": 52459.798, + "profiling_runs": 1752099.286, "runtimes": [ - 6314.176 + 5861.824 ], - "search_algorithm": 36.201, - "validation": 16.132 + "search_algorithm": 31.997, + "validation": 21.409 }, - "timestamp": "2026-01-27 09:27:30 UTC" + "timestamp": "2026-03-02 14:29:44 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 16, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -47406,61 +50925,61 @@ { "name": "time", "unit": "", - "value": 6357.952 + "value": 5997.312 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.588712747154464 + "value": 8.21437019225386 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2114356.0 + "value": 9120.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874112.0 + "value": 1868940.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.984095168911022 + "value": 1.0598538800501756 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2213229.0 + "value": 105534.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109054.0 + "value": 2103000.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.871137307941424 + "value": 1.7421734460089777 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01948806218214341 + "value": 0.020411718040420582 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47490,13 +51009,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.92991810296571 + "value": 81.98496725575745 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97258321622998 + "value": 99.97184252665176 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47508,7 +51027,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47520,7 +51039,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5912395776.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -47532,13 +51051,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1087897600.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -47550,13 +51069,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 914653184.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.793328185657344 + "value": 44.06070527278263 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47568,13 +51087,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.61499787577113 + "value": 27.87664839102866 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0493950578459565 + "value": 0.9902471535387376 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47586,7 +51105,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.79248417730727 + "value": 94.98409843116656 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47605,21 +51124,38 @@ "time" ], "times": { - "compilation": 15085.103, - "data": 59288.787, - "framework": 3552151.649, - "kernel_overhead": 1700497.713, - "profiling_overhead": 49240.643, - "profiling_runs": 1743124.506, + "compilation": 18420.375, + "data": 66845.677, + "framework": 3585667.8719999995, + "kernel_overhead": 1710441.407, + "profiling_overhead": 56500.854, + "profiling_runs": 1751879.934, "runtimes": [ - 6357.952 + 5997.312 ], - "search_algorithm": 27.768, - "validation": 14.187 + "search_algorithm": 36.69, + "validation": 20.282 }, - "timestamp": "2026-01-27 09:27:32 UTC" + "timestamp": "2026-03-02 14:29:46 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 128, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 2, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -47636,61 +51172,61 @@ { "name": "time", "unit": "", - "value": 6391.488 + "value": 5997.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.5061806344139 + "value": 8.045999333758832 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2103472.0 + "value": 2104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840940.0 + "value": 1837420.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.987970117095943 + "value": 1.0554914148191614 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2210561.0 + "value": 99579.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108914.0 + "value": 2099815.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8716866838302628 + "value": 1.7422248260009499 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01949132835750345 + "value": 0.020412306667398905 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47720,13 +51256,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.99590198551324 + "value": 82.03084386557352 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96648609402853 + "value": 99.97377177233805 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47738,7 +51274,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47750,7 +51286,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5912395776.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -47762,13 +51298,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1087897600.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -47780,13 +51316,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 914653184.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.80226677769365 + "value": 44.06146674128025 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47798,13 +51334,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.621082081862212 + "value": 27.876914323775935 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.049634950249206 + "value": 0.9902566001243649 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47816,7 +51352,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.81324197760276 + "value": 94.98500392066505 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47835,21 +51371,38 @@ "time" ], "times": { - "compilation": 16894.381, - "data": 62747.149, - "framework": 3560825.3099999996, - "kernel_overhead": 1701381.413, - "profiling_overhead": 52448.519, - "profiling_runs": 1744248.229, + "compilation": 16285.268, + "data": 61057.751, + "framework": 3575419.1559999995, + "kernel_overhead": 1711148.248, + "profiling_overhead": 51100.351, + "profiling_runs": 1752112.806, "runtimes": [ - 6391.488 + 5997.44 ], - "search_algorithm": 30.75, - "validation": 17.817 + "search_algorithm": 28.463, + "validation": 16.524 }, - "timestamp": "2026-01-27 09:27:34 UTC" + "timestamp": "2026-03-02 14:29:48 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 64, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 4, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -47866,61 +51419,61 @@ { "name": "time", "unit": "", - "value": 6283.904 + "value": 6017.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.464143266536922 + "value": 8.079542089855753 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104164.0 + "value": 10800.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837452.0 + "value": 1840364.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9732914896705638 + "value": 1.066640577830882 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2205340.0 + "value": 110568.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100784.0 + "value": 2109045.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.87170860796226 + "value": 1.7421664050152015 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019493072249688995 + "value": 0.020411509180943345 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47950,13 +51503,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.95858398375442 + "value": 81.98873966620913 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9738577405486 + "value": 99.97384059039892 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47968,7 +51521,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47980,7 +51533,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5912395776.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -47992,13 +51545,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1087897600.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -48010,13 +51563,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 914653184.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.802973051944555 + "value": 44.05954477633211 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48028,13 +51581,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.62150077337741 + "value": 27.87580601465629 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0496514587159305 + "value": 0.9902172302569557 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48046,7 +51599,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.81466927995243 + "value": 94.98122872155051 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48065,21 +51618,38 @@ "time" ], "times": { - "compilation": 17798.441, - "data": 61568.878, - "framework": 3554976.7630000003, - "kernel_overhead": 1699896.458, - "profiling_overhead": 50687.585, - "profiling_runs": 1742823.842, + "compilation": 16074.796, + "data": 66391.894, + "framework": 3583095.781, + "kernel_overhead": 1709411.805, + "profiling_overhead": 56473.797, + "profiling_runs": 1750818.285, "runtimes": [ - 6283.904 + 6017.824 ], - "search_algorithm": 28.643, - "validation": 17.18 + "search_algorithm": 24.867, + "validation": 19.972 }, - "timestamp": "2026-01-27 09:27:35 UTC" + "timestamp": "2026-03-02 14:29:50 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8, + "y": 32, + "z": 8 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 8, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 48 + }, "configuration": { "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", @@ -48096,61 +51666,61 @@ { "name": "time", "unit": "", - "value": 6304.32 + "value": 6069.92 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.52712655712771 + "value": 8.074889278104397 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2110932.0 + "value": 17748.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839120.0 + "value": 1835244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9804851011472249 + "value": 1.0699909902100238 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 2212214.0 + "value": 116713.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102329.0 + "value": 2106524.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8748695419607588 + "value": 1.7438483358716461 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4718592.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019525957176308874 + "value": 0.020430260487816362 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48180,13 +51750,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.87262847716042 + "value": 81.85695580834701 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9761339149827 + "value": 99.96423935453412 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48198,7 +51768,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4479516672.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -48210,7 +51780,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5912395776.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -48222,13 +51792,13 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 167772160.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1087897600.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -48240,13 +51810,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 914653184.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.86762933366204 + "value": 44.10406079865785 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48258,13 +51828,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.6658042815741 + "value": 27.904094336944908 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0513982889341351 + "value": 0.9912221010804405 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48276,7 +51846,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.96581936652514 + "value": 95.07762955026169 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48295,19 +51865,19 @@ "time" ], "times": { - "compilation": 17138.247, - "data": 64072.067, - "framework": 3560020.715, - "kernel_overhead": 1699643.6, - "profiling_overhead": 53929.429, - "profiling_runs": 1742375.619, + "compilation": 15980.912, + "data": 60555.548, + "framework": 3575935.317, + "kernel_overhead": 1711449.768, + "profiling_overhead": 50852.782, + "profiling_runs": 1753077.219, "runtimes": [ - 6304.32 + 6069.92 ], - "search_algorithm": 14.401, - "validation": 19.035 + "search_algorithm": 13.173, + "validation": 21.007 }, - "timestamp": "2026-01-27 09:27:37 UTC" + "timestamp": "2026-03-02 14:29:51 UTC" } ], "schema_version": "1.0.0" diff --git a/Source/Output/JsonT4Converters.cpp b/Source/Output/JsonT4Converters.cpp index e39d95a8..253afaf3 100644 --- a/Source/Output/JsonT4Converters.cpp +++ b/Source/Output/JsonT4Converters.cpp @@ -114,7 +114,7 @@ void to_json(json& j, const as_T4& result) j["measurements"].push_back({{"name","time"}, {"value",time.ConvertFromNanosecondsDouble(result.v.GetTotalDuration())}, {"unit",""}}); const std::vector& compResults = result.v.GetResults(); - if (compResults[0].HasProfilingData()) { + if (!compResults.empty() && compResults[0].HasProfilingData()) { const std::vector& counters = compResults[0].GetProfilingData().GetCounters(); for (const auto& counter : counters) { json j_counter = json::object(); @@ -123,6 +123,29 @@ void to_json(json& j, const as_T4& result) } } + if (!compResults.empty() && compResults[0].HasCompilationData()) { + const KernelCompilationData& compilationData = compResults[0].GetCompilationData(); + const DimensionVector& globalSize = compResults[0].GetGlobalSize(); + const DimensionVector& localSize = compResults[0].GetLocalSize(); + json j_compilationData = json::object(); + j["compilation_data"] = { + {"max_work_group_size", compilationData.m_MaxWorkGroupSize}, + {"local_memory_size", compilationData.m_LocalMemorySize}, + {"private_memory_size", compilationData.m_PrivateMemorySize}, + {"constant_memory_size", compilationData.m_ConstantMemorySize}, + {"registers", compilationData.m_RegistersCount}, + {"global_size", { + {"x", globalSize.GetSizeX()}, + {"y", globalSize.GetSizeY()}, + {"z", globalSize.GetSizeZ()} + }}, + {"local_size", { + {"x", localSize.GetSizeX()}, + {"y", localSize.GetSizeY()}, + {"z", localSize.GetSizeZ()} + }} + }; + } } void from_json(const json& j, as_T4& result) @@ -195,6 +218,50 @@ void from_json(const json& j, as_T4& result) computationResult.SetProfilingData(std::move(uniqueData)); } + if (j.contains("compilation_data")) + { + const auto& compilationDataJson = j["compilation_data"]; + + if (!compilationDataJson.contains("max_work_group_size") || + !compilationDataJson.contains("local_memory_size") || + !compilationDataJson.contains("private_memory_size") || + !compilationDataJson.contains("constant_memory_size") || + !compilationDataJson.contains("registers") || + !compilationDataJson.contains("global_size") || + !compilationDataJson.contains("local_size")) + { + KttError( + "Missing compilation data fields. Required fields: max_work_group_size, local_memory_size, private_memory_size, constant_memory_size, registers, global_size, local_size"); + } + + // Extract compilation data + KernelCompilationData compData; + compData.m_MaxWorkGroupSize = compilationDataJson["max_work_group_size"]; + compData.m_LocalMemorySize = compilationDataJson["local_memory_size"]; + compData.m_PrivateMemorySize = compilationDataJson["private_memory_size"]; + compData.m_ConstantMemorySize = compilationDataJson["constant_memory_size"]; + compData.m_RegistersCount = compilationDataJson["registers"]; + + // Extract global size + const auto& globalSizeJson = compilationDataJson["global_size"]; + if (!globalSizeJson.contains("x") || !globalSizeJson.contains("y") || !globalSizeJson.contains("z")) + { + KttError("Missing global_size dimensions"); + } + DimensionVector globalSize(globalSizeJson["x"], globalSizeJson["y"], globalSizeJson["z"]); + + // Extract local size + const auto& localSizeJson = compilationDataJson["local_size"]; + if (!localSizeJson.contains("x") || !localSizeJson.contains("y") || !localSizeJson.contains("z")) + { + KttError("Missing local_size dimensions"); + } + DimensionVector localSize(localSizeJson["x"], localSizeJson["y"], localSizeJson["z"]); + + computationResult.SetCompilationData(std::make_unique(compData)); + computationResult.SetSizeData(globalSize, localSize); + } + results.push_back(computationResult); result.v = KernelResult(kernelName, configuration, results, timestamp); diff --git a/Tutorials/03KernelTuning/FullSearchSpace.t4.json b/Tutorials/03KernelTuning/FullSearchSpace.t4.json index 1bf626bb..14f100a6 100644 --- a/Tutorials/03KernelTuning/FullSearchSpace.t4.json +++ b/Tutorials/03KernelTuning/FullSearchSpace.t4.json @@ -5,11 +5,28 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-01-27 09:23:42 UTC", + "timestamp": "2026-03-04 11:13:39 UTC", "timeunit": "microseconds" }, "results": [ { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 32768, + "y": 1, + "z": 1 + }, + "local_memory_size": 0, + "local_size": { + "x": 32, + "y": 1, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 12 + }, "configuration": { "multiply_block_size": "32" }, @@ -19,49 +36,49 @@ { "name": "time", "unit": "", - "value": 92.0 + "value": 94.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.432367149758452 + "value": 19.827121284755513 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2188.0 + "value": 2020.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 49300.0 + "value": 50920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 20.098754245965317 + "value": 19.872445901375592 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 263487.0 + "value": 263537.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 132107.0 + "value": 131582.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.360174376851201 + "value": 6.441561913636126 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -73,7 +90,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4880152289058584 + "value": 1.4759668916411122 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -103,13 +120,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.119426339496563 + "value": 25.283510567368428 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 41.87776550681886 + "value": 41.14330905218855 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -169,7 +186,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.993584919572935 + "value": 8.070893672901722 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -187,7 +204,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.106469081611187 + "value": 7.174760249686823 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -199,7 +216,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.384947115707838 + "value": 13.513517951817022 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -218,21 +235,38 @@ "time" ], "times": { - "compilation": 33384.951, - "data": 6377.032, - "framework": 40685.69, - "kernel_overhead": 8855.066, - "profiling_overhead": 4024.685, - "profiling_runs": 21428.907, + "compilation": 42500.777, + "data": 7388.856, + "framework": 44869.263, + "kernel_overhead": 9641.577, + "profiling_overhead": 4680.642, + "profiling_runs": 23158.188, "runtimes": [ - 92.0 + 94.048 ], - "search_algorithm": 15.506, - "validation": 11025.241 + "search_algorithm": 23.224, + "validation": 11778.9 }, - "timestamp": "2026-01-27 09:23:41 UTC" + "timestamp": "2026-03-04 11:13:39 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 16384, + "y": 1, + "z": 1 + }, + "local_memory_size": 0, + "local_size": { + "x": 64, + "y": 1, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 12 + }, "configuration": { "multiply_block_size": "64" }, @@ -242,49 +276,49 @@ { "name": "time", "unit": "", - "value": 56.256 + "value": 55.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 39.84525529865125 + "value": 36.87621124031008 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3768.0 + "value": 3628.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 49172.0 + "value": 45084.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 39.99961514778325 + "value": 40.13202292869339 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262719.0 + "value": 262755.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 132098.0 + "value": 131976.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.8597363121381 + "value": 6.886138515029279 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -296,7 +330,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.965129326258334 + "value": 2.9827921439546756 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -326,13 +360,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.58726199497857 + "value": 29.674577523171475 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.80781676427367 + "value": 82.36401843126689 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -392,7 +426,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.154198577983072 + "value": 8.14842874269299 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -410,7 +444,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.249012242441941 + "value": 7.242949532492342 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -422,7 +456,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.696757323564157 + "value": 13.684307375544268 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -441,21 +475,38 @@ "time" ], "times": { - "compilation": 12291.298, - "data": 4883.968, - "framework": 42371.962, - "kernel_overhead": 10481.932, - "profiling_overhead": 3881.842, - "profiling_runs": 23124.22, + "compilation": 13826.961, + "data": 5674.116, + "framework": 41277.788, + "kernel_overhead": 8908.093, + "profiling_overhead": 4539.167, + "profiling_runs": 22156.412, "runtimes": [ - 56.256 + 55.264 ], - "search_algorithm": 11.752, - "validation": 10612.768 + "search_algorithm": 12.698, + "validation": 11471.168 }, - "timestamp": "2026-01-27 09:23:41 UTC" + "timestamp": "2026-03-04 11:13:39 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 8192, + "y": 1, + "z": 1 + }, + "local_memory_size": 0, + "local_size": { + "x": 128, + "y": 1, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 12 + }, "configuration": { "multiply_block_size": "128" }, @@ -465,49 +516,49 @@ { "name": "time", "unit": "", - "value": 50.272 + "value": 51.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.78093853820599 + "value": 72.11106115107914 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 15436.0 + "value": 9984.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 46040.0 + "value": 41336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 69.0787996969782 + "value": 74.64229988486942 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262449.0 + "value": 262484.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131341.0 + "value": 131563.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.039914043535358 + "value": 13.333441841160818 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -519,7 +570,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.131545978451516 + "value": 5.544200652418143 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -549,13 +600,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.73727897382838 + "value": 62.173972125695464 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.4738239163117 + "value": 82.47019619919057 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -615,7 +666,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 15.095695800750002 + "value": 15.123652293351647 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -633,7 +684,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.42039855118836 + "value": 13.445343670644883 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -645,7 +696,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.408111809350725 + "value": 25.455595499002154 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -664,21 +715,38 @@ "time" ], "times": { - "compilation": 13162.126, - "data": 4915.018, - "framework": 39486.164000000004, - "kernel_overhead": 8791.818, - "profiling_overhead": 4019.025, - "profiling_runs": 21760.303, + "compilation": 14160.107, + "data": 5192.749, + "framework": 40546.879, + "kernel_overhead": 9063.23, + "profiling_overhead": 4221.417, + "profiling_runs": 22069.483, "runtimes": [ - 50.272 + 51.776 ], - "search_algorithm": 11.35, - "validation": 10382.171 + "search_algorithm": 15.502, + "validation": 10718.272 }, - "timestamp": "2026-01-27 09:23:41 UTC" + "timestamp": "2026-03-04 11:13:39 UTC" }, { + "compilation_data": { + "constant_memory_size": 0, + "global_size": { + "x": 4096, + "y": 1, + "z": 1 + }, + "local_memory_size": 0, + "local_size": { + "x": 256, + "y": 1, + "z": 1 + }, + "max_work_group_size": 1024, + "private_memory_size": 0, + "registers": 12 + }, "configuration": { "multiply_block_size": "256" }, @@ -688,49 +756,49 @@ { "name": "time", "unit": "", - "value": 48.64 + "value": 53.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 77.39057570977917 + "value": 76.65874094202898 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12692.0 + "value": 10196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 50112.0 + "value": 43968.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 65.45649428343525 + "value": 74.86345335764206 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262472.0 + "value": 262432.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131283.0 + "value": 131633.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.011280837664735 + "value": 13.187193515867252 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -742,7 +810,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 4.862413637550749 + "value": 5.561288220279151 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -772,13 +840,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.664028283494076 + "value": 63.05144087367304 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 73.746557373157 + "value": 84.22369725194156 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -838,7 +906,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.840054247546686 + "value": 14.854526714350053 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -856,7 +924,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.186822031425748 + "value": 13.20599404142389 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -868,7 +936,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.98093488483463 + "value": 25.01725407775569 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -887,19 +955,19 @@ "time" ], "times": { - "compilation": 12729.014, - "data": 4813.984, - "framework": 38633.167, - "kernel_overhead": 8850.544, - "profiling_overhead": 3927.906, - "profiling_runs": 21040.733, + "compilation": 13681.31, + "data": 5773.082, + "framework": 41804.396, + "kernel_overhead": 9070.811, + "profiling_overhead": 4826.182, + "profiling_runs": 22134.321, "runtimes": [ - 48.64 + 53.632 ], - "search_algorithm": 4.332, - "validation": 9586.204 + "search_algorithm": 8.022, + "validation": 12624.658 }, - "timestamp": "2026-01-27 09:23:42 UTC" + "timestamp": "2026-03-04 11:13:39 UTC" } ], "schema_version": "1.0.0" From c5349d523761f14dcdd7894ad408b2eafb58f60b Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Thu, 5 Mar 2026 10:12:33 +0100 Subject: [PATCH 2/3] Fix naming bug to comply with T4 results schema --- ...oulomb_rtx500ada_full_search_space.t4.json | 16632 ++++++++-------- Source/Output/JsonT4Converters.cpp | 4 +- .../03KernelTuning/FullSearchSpace.t4.json | 194 +- 3 files changed, 8415 insertions(+), 8415 deletions(-) diff --git a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json index 390dfc84..5e6bdf2b 100644 --- a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json +++ b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json @@ -5,7 +5,7 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-03-02 14:29:51 UTC", + "timestamp": "2026-03-05 09:00:49 UTC", "timeunit": "microseconds" }, "results": [ @@ -43,49 +43,49 @@ { "name": "time", "unit": "", - "value": 4624.704 + "value": 4164.159 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.411948636927773 + "value": 15.08856462977461 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 232.0 + "value": 4716.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868228.0 + "value": 1870148.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6188741888304559 + "value": 1.6511836629193575 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64674.0 + "value": 67640.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099214.0 + "value": 2099965.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.494926321081408 + "value": 22.496709444370346 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -97,7 +97,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0542418047027353 + "value": 1.0542384694410099 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -127,13 +127,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.42613609541247 + "value": 60.34603258600442 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95692785096995 + "value": 99.94811431437259 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -193,7 +193,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.990894664469515 + "value": 39.99411082917537 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -205,13 +205,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.000366292150126 + "value": 45.00419209644674 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.953222296352378 + "value": 11.954238525618663 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -223,7 +223,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.21795747517453 + "value": 57.22274318740528 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -242,37 +242,37 @@ "time" ], "times": { - "compilation": 38667.992, - "data": 65390.136, - "framework": 276837.773, - "kernel_overhead": 60081.448, - "profiling_overhead": 54665.666, - "profiling_runs": 96700.523, + "compilation_time": 14958.313, + "data": 79109.886, + "framework": 298309.184, + "kernel_overhead": 59740.158, + "profiling_overhead": 65245.883, + "profiling_runs": 94213.257, "runtimes": [ - 4624.704 + 4164.159 ], - "search_algorithm": 23.173, - "validation": 16.077 + "search_algorithm": 44.101, + "validation": 21.809 }, - "timestamp": "2026-03-02 14:27:8 UTC" + "timestamp": "2026-03-05 08:58:2 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, - "z": 256 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 44 + "registers": 31 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -280,9 +280,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -290,61 +290,61 @@ { "name": "time", "unit": "", - "value": 4111.36 + "value": 2065.599 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.747035144386057 + "value": 31.19627659574468 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 828.0 + "value": 5716.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870548.0 + "value": 1871052.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6464863800751055 + "value": 2.9417972854982906 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64999.0 + "value": 40336.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099246.0 + "value": 2099148.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.508424333784806 + "value": 19.914180329323386 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0546425853891812 + "value": 0.9329897641046083 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -374,13 +374,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 71.46997356111453 + "value": 95.77744355125036 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94899829123742 + "value": 99.9367232322082 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -392,7 +392,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -404,43 +404,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 587202560.0 + "value": 704643072.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 117440512.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 234881024.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 341311488.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.00891329141059 + "value": 50.492465935580846 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -452,13 +452,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.02104514559877 + "value": 79.66553631334968 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.958715116799674 + "value": 10.736175792228764 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -470,7 +470,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.24425298156683 + "value": 76.2432473827244 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -489,47 +489,47 @@ "time" ], "times": { - "compilation": 14047.967, - "data": 66698.358, - "framework": 266112.539, - "kernel_overhead": 54507.466, - "profiling_overhead": 57011.748, - "profiling_runs": 87894.967, + "compilation_time": 28842.744, + "data": 79997.846, + "framework": 294033.778, + "kernel_overhead": 62610.538, + "profiling_overhead": 65758.4, + "profiling_runs": 85666.994, "runtimes": [ - 4111.36 + 2065.599 ], - "search_algorithm": 34.088, - "validation": 17.611 + "search_algorithm": 43.138, + "validation": 27.005 }, - "timestamp": "2026-03-02 14:27:8 UTC" + "timestamp": "2026-03-05 08:58:3 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 256 + "x": 16, + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 44 + "registers": 39 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -537,61 +537,61 @@ { "name": "time", "unit": "", - "value": 3825.376 + "value": 2062.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.980192345577304 + "value": 34.85724296278626 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8540.0 + "value": 24.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1846884.0 + "value": 1870332.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6980193787316176 + "value": 3.3292094087218165 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 72050.0 + "value": 31981.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2135034.0 + "value": 2098881.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.559228035495185 + "value": 11.292560514913129 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0571974309534597 + "value": 0.5290514380679806 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -621,13 +621,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.38176541052634 + "value": 96.98345631231332 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95697546885552 + "value": 99.91489036218367 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -639,7 +639,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -651,43 +651,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 587202560.0 + "value": 390070272.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 117440512.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 234881024.0 + "value": 88080384.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 341311488.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.10313416890375 + "value": 45.84691858176103 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -699,13 +699,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.12650585491358 + "value": 90.36835761213294 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.98672811771142 + "value": 6.265774795372499 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -717,7 +717,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.378335836811935 + "value": 66.45370302261419 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -736,47 +736,47 @@ "time" ], "times": { - "compilation": 13892.782, - "data": 66846.285, - "framework": 254133.20500000002, - "kernel_overhead": 50680.543, - "profiling_overhead": 55639.911, - "profiling_runs": 80966.466, + "compilation_time": 24253.315, + "data": 78566.579, + "framework": 240769.07400000002, + "kernel_overhead": 37803.868, + "profiling_overhead": 64569.858, + "profiling_runs": 59828.769, "runtimes": [ - 3825.376 + 2062.048 ], - "search_algorithm": 23.45, - "validation": 13.532 + "search_algorithm": 34.974, + "validation": 25.508 }, - "timestamp": "2026-03-02 14:27:9 UTC" + "timestamp": "2026-03-05 08:58:3 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 256 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 44 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -784,61 +784,61 @@ { "name": "time", "unit": "", - "value": 3746.176 + "value": 1785.216 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.059047490312697 + "value": 36.2106897790329 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12600.0 + "value": 424.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842328.0 + "value": 1870620.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.687278699365753 + "value": 3.4349717901443535 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 75401.0 + "value": 32211.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105665.0 + "value": 2099175.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.571437825600857 + "value": 5.798133949028227 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0577408875914127 + "value": 0.27158949142281796 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -868,13 +868,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 70.6814225076394 + "value": 98.54738722612807 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9560947457864 + "value": 99.91037980534982 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -886,7 +886,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -898,43 +898,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 587202560.0 + "value": 362807296.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 117440512.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 234881024.0 + "value": 127926272.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 341311488.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.124476280278614 + "value": 42.63220670537569 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -946,13 +946,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.150101137248946 + "value": 92.78570113161689 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.992995614581751 + "value": 3.3979138598004237 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -964,7 +964,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.408348162568586 + "value": 60.07610249989111 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -983,47 +983,47 @@ "time" ], "times": { - "compilation": 14499.703, - "data": 69820.855, - "framework": 258114.99899999998, - "kernel_overhead": 50014.144, - "profiling_overhead": 58158.457, - "profiling_runs": 80121.543, + "compilation_time": 23739.478, + "data": 77263.467, + "framework": 226799.34200000003, + "kernel_overhead": 32201.116, + "profiling_overhead": 64149.56, + "profiling_runs": 53185.199, "runtimes": [ - 3746.176 + 1785.216 ], - "search_algorithm": 23.333, - "validation": 17.556 + "search_algorithm": 32.723, + "validation": 28.135 }, - "timestamp": "2026-03-02 14:27:9 UTC" + "timestamp": "2026-03-05 08:58:3 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 256 + "x": 16, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 44 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -1031,61 +1031,61 @@ { "name": "time", "unit": "", - "value": 3683.744 + "value": 1749.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.150522171212039 + "value": 37.06801098400486 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8948.0 + "value": 3308.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838976.0 + "value": 1871420.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6826861930656676 + "value": 3.5387505637651557 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 69034.0 + "value": 33523.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100456.0 + "value": 2100375.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.5516869656456 + "value": 2.9960790924695573 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0565964972188082 + "value": 0.14035228789763055 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1115,13 +1115,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 72.23711013846311 + "value": 98.3774365188951 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94477798296198 + "value": 99.92256523959945 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1133,7 +1133,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1145,43 +1145,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 587202560.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 117440512.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 234881024.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 341311488.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.08458039205776 + "value": 39.53929291667869 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1193,13 +1193,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.10635918935927 + "value": 95.88807924252993 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.981376659673556 + "value": 1.9430445744946252 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1211,7 +1211,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.35271112518268 + "value": 58.362620819453305 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1230,19 +1230,19 @@ "time" ], "times": { - "compilation": 14510.8, - "data": 64151.459, - "framework": 250951.25100000002, - "kernel_overhead": 52053.758, - "profiling_overhead": 52741.051, - "profiling_runs": 82004.983, + "compilation_time": 23699.458, + "data": 77649.478, + "framework": 232715.30500000002, + "kernel_overhead": 34816.117, + "profiling_overhead": 64360.759, + "profiling_runs": 55888.951, "runtimes": [ - 3683.744 + 1749.376 ], - "search_algorithm": 23.625, - "validation": 13.158 + "search_algorithm": 34.358, + "validation": 28.966 }, - "timestamp": "2026-03-02 14:27:9 UTC" + "timestamp": "2026-03-05 08:58:3 UTC" }, { "compilation_data": { @@ -1250,7 +1250,7 @@ "global_size": { "x": 16, "y": 64, - "z": 128 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -1260,7 +1260,7 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 48 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -1270,7 +1270,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -1278,61 +1278,61 @@ { "name": "time", "unit": "", - "value": 2084.736 + "value": 1761.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 23.641546139908108 + "value": 36.907264989644894 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 488.0 + "value": 3820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870016.0 + "value": 1866652.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9409661601340997 + "value": 3.5314718538865773 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 36048.0 + "value": 32563.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099100.0 + "value": 2101849.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.91120337137657 + "value": 1.4946475753164037 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9328369539472485 + "value": 0.07002164243147418 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1362,13 +1362,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.16141159052735 + "value": 81.7369421065671 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93681929633743 + "value": 99.93557808028555 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1380,7 +1380,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1392,43 +1392,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 704643072.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 256901120.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.482728224246095 + "value": 37.940535645157915 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1440,13 +1440,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.65241169102248 + "value": 95.6645113811566 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.73440704429795 + "value": 1.1561018831463015 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1458,7 +1458,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.23083997925477 + "value": 53.526208355551276 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1477,45 +1477,45 @@ "time" ], "times": { - "compilation": 52783.613, - "data": 66038.952, - "framework": 263544.398, - "kernel_overhead": 60380.561, - "profiling_overhead": 54869.582, - "profiling_runs": 82255.303, + "compilation_time": 23685.291, + "data": 78190.631, + "framework": 214281.375, + "kernel_overhead": 25009.061, + "profiling_overhead": 65057.868, + "profiling_runs": 46023.815, "runtimes": [ - 2084.736 + 1761.088 ], - "search_algorithm": 24.112, - "validation": 15.49 + "search_algorithm": 34.246, + "validation": 26.896 }, - "timestamp": "2026-03-02 14:27:9 UTC" + "timestamp": "2026-03-05 08:58:3 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, + "y": 64, "z": 128 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "2" }, @@ -1525,49 +1525,49 @@ { "name": "time", "unit": "", - "value": 2124.64 + "value": 5036.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 23.462335694639886 + "value": 12.502511456516535 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6816.0 + "value": 932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871252.0 + "value": 1870868.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9374073118834225 + "value": 1.2087752958969935 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 42415.0 + "value": 86644.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100271.0 + "value": 2099613.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.911763669148296 + "value": 7.955361178183637 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -1579,7 +1579,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9328470453106084 + "value": 0.37280262557656435 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1609,13 +1609,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.35967604961488 + "value": 97.65932195477647 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93735268875814 + "value": 99.96304194450488 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1627,7 +1627,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1639,13 +1639,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 704643072.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -1657,7 +1657,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -1669,13 +1669,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 256901120.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.48388025347601 + "value": 36.26869727517785 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1687,13 +1687,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.65284823425156 + "value": 31.824252340707808 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.734465875319058 + "value": 4.288815256853201 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1705,7 +1705,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.23131236852525 + "value": 84.8132707652535 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1724,47 +1724,47 @@ "time" ], "times": { - "compilation": 14412.918, - "data": 66523.767, - "framework": 264283.49100000004, - "kernel_overhead": 60050.037, - "profiling_overhead": 55476.802, - "profiling_runs": 82232.885, + "compilation_time": 38163.739, + "data": 78482.367, + "framework": 2055944.9449999998, + "kernel_overhead": 937215.515, + "profiling_overhead": 65372.435, + "profiling_runs": 974874.628, "runtimes": [ - 2124.64 + 5036.256 ], - "search_algorithm": 22.515, - "validation": 18.746 + "search_algorithm": 51.765, + "validation": 25.886 }, - "timestamp": "2026-03-02 14:27:9 UTC" + "timestamp": "2026-03-05 08:58:4 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 128 + "x": 16, + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -1772,61 +1772,61 @@ { "name": "time", "unit": "", - "value": 2127.392 + "value": 5652.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.94248148089273 + "value": 11.394571769264015 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6820.0 + "value": 13952.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840560.0 + "value": 1875804.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9203920145111426 + "value": 1.1145089532655934 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 42596.0 + "value": 110014.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100374.0 + "value": 2108825.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.907779410784542 + "value": 3.6247277006174636 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9327720734660485 + "value": 0.16985528331244065 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1856,13 +1856,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.8063099416199 + "value": 98.6001940199266 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93537103793192 + "value": 99.95914913395472 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1874,7 +1874,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -1886,43 +1886,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 704643072.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 256901120.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.479649516642766 + "value": 33.82916869106652 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1934,13 +1934,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.6480259615895 + "value": 29.00048196669724 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.733815998729836 + "value": 2.010775605112797 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1952,7 +1952,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.22664145761797 + "value": 85.01932998461622 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1971,47 +1971,47 @@ "time" ], "times": { - "compilation": 14353.455, - "data": 69155.819, - "framework": 268261.904, - "kernel_overhead": 59251.505, - "profiling_overhead": 58471.272, - "profiling_runs": 81383.308, + "compilation_time": 24541.223, + "data": 79995.385, + "framework": 2633698.904, + "kernel_overhead": 1223463.404, + "profiling_overhead": 66650.71, + "profiling_runs": 1263589.405, "runtimes": [ - 2127.392 + 5652.48 ], - "search_algorithm": 27.037, - "validation": 15.405 + "search_algorithm": 41.204, + "validation": 24.811 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:6 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 128 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 31 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -2019,61 +2019,61 @@ { "name": "time", "unit": "", - "value": 2164.768 + "value": 8013.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 23.249493265068804 + "value": 8.189313221389154 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2928.0 + "value": 16332.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839652.0 + "value": 1932604.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.948621098973287 + "value": 48.77089332021168 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 39384.0 + "value": 968608.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103158.0 + "value": 138418932.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.908356288799332 + "value": 1.268815657925955 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9326777340553464 + "value": 0.05929962738909261 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2091,25 +2091,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.08927620238273 + "value": 90.91210565187163 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92497195202075 + "value": 99.38980851415344 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2121,7 +2121,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2133,43 +2133,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 704643072.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 256901120.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.48051672525135 + "value": 10.650280349933606 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2181,13 +2181,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.64825850632965 + "value": 20.365206236674894 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.733847337767083 + "value": 11.027838728746318 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2199,7 +2199,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.2269197951188 + "value": 29.98622427975115 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2218,47 +2218,47 @@ "time" ], "times": { - "compilation": 14089.717, - "data": 68407.585, - "framework": 268120.423, - "kernel_overhead": 59431.985, - "profiling_overhead": 58664.217, - "profiling_runs": 81616.636, + "compilation_time": 23858.124, + "data": 78159.021, + "framework": 918072.5819999999, + "kernel_overhead": 361327.816, + "profiling_overhead": 65112.2, + "profiling_runs": 413473.545, "runtimes": [ - 2164.768 + 8013.44 ], - "search_algorithm": 21.14, - "validation": 14.749 + "search_algorithm": 40.389, + "validation": 24.587 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:6 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 128 + "x": 16, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 31 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -2266,61 +2266,61 @@ { "name": "time", "unit": "", - "value": 2097.536 + "value": 8059.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.793559107453714 + "value": 8.554184563722444 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4904.0 + "value": 19976.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838712.0 + "value": 2024444.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.908916147115297 + "value": 54.24017876637235 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 40608.0 + "value": 17152204.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100206.0 + "value": 138420548.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.82995532850941 + "value": 0.6326617969800049 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9287484315607809 + "value": 0.029527060969872645 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2338,25 +2338,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.89653215951564 + "value": 88.25322446437328 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89606359192587 + "value": 99.39824811511478 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2368,7 +2368,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2380,43 +2380,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 704643072.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 256901120.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.28373780185112 + "value": 9.164085373791956 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2428,13 +2428,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.33565812656536 + "value": 20.27917057997045 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.69171955221291 + "value": 10.649535136112412 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2446,7 +2446,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.927812963877 + "value": 27.72815965897177 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2465,19 +2465,19 @@ "time" ], "times": { - "compilation": 14579.325, - "data": 69786.15, - "framework": 270141.891, - "kernel_overhead": 59620.219, - "profiling_overhead": 58990.346, - "profiling_runs": 81745.176, + "compilation_time": 23165.279, + "data": 77944.504, + "framework": 882047.1429999999, + "kernel_overhead": 343819.281, + "profiling_overhead": 64384.98, + "profiling_runs": 395898.378, "runtimes": [ - 2097.536 + 8059.2 ], - "search_algorithm": 22.796, - "validation": 13.722 + "search_algorithm": 42.603, + "validation": 29.839 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:7 UTC" }, { "compilation_data": { @@ -2485,7 +2485,7 @@ "global_size": { "x": 16, "y": 64, - "z": 64 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -2494,18 +2494,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 39 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -2513,61 +2513,61 @@ { "name": "time", "unit": "", - "value": 1824.16 + "value": 9279.392 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.938519246308672 + "value": 8.287981037043913 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4656.0 + "value": 844.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872920.0 + "value": 2284464.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3410455860439634 + "value": 75.05927508481193 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 39998.0 + "value": 110113746.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104082.0 + "value": 138417394.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.290663649221761 + "value": 0.277256639031899 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5288503283203805 + "value": 0.01279526175463149 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2585,25 +2585,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.07624468088186 + "value": 92.05232653461279 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90168753684452 + "value": 98.90666320961353 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2615,7 +2615,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2627,43 +2627,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 390070272.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 88080384.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 197394432.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.83409883610383 + "value": 7.3551326086856275 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2675,13 +2675,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.34594402294164 + "value": 17.662912502972294 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.26422072815318 + "value": 9.131156549082966 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2693,7 +2693,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.4374039509074 + "value": 23.2226782307264 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2712,45 +2712,45 @@ "time" ], "times": { - "compilation": 56995.722, - "data": 62746.945, - "framework": 205096.807, - "kernel_overhead": 34237.658, - "profiling_overhead": 53080.389, - "profiling_runs": 55031.815, + "compilation_time": 24339.71, + "data": 79323.557, + "framework": 874243.786, + "kernel_overhead": 335047.331, + "profiling_overhead": 65887.142, + "profiling_runs": 393985.756, "runtimes": [ - 1824.16 + 9279.392 ], - "search_algorithm": 25.066, - "validation": 14.408 + "search_algorithm": 43.566, + "validation": 30.712 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:7 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, + "y": 64, "z": 64 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 39 + "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "4" }, @@ -2760,49 +2760,49 @@ { "name": "time", "unit": "", - "value": 1852.864 + "value": 3366.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.710777654181406 + "value": 18.81636007159966 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2408.0 + "value": 152.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872492.0 + "value": 1870140.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3234204776513065 + "value": 1.807543781439878 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35972.0 + "value": 58473.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103006.0 + "value": 2098982.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.290177831907842 + "value": 6.0603114780106395 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -2814,7 +2814,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5288589205992874 + "value": 0.2839598393713506 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2844,13 +2844,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.9153412671905 + "value": 98.32896806220486 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90543601809861 + "value": 99.94940131107272 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2862,7 +2862,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -2874,13 +2874,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 390070272.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -2892,7 +2892,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 88080384.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -2904,13 +2904,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 197394432.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.83302684144656 + "value": 41.3840702965069 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2922,13 +2922,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.34402202024393 + "value": 48.48701304561159 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.264087464294256 + "value": 3.36189250609221 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2940,7 +2940,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.43604341357506 + "value": 87.59917436439628 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2959,47 +2959,47 @@ "time" ], "times": { - "compilation": 14491.392, - "data": 70302.524, - "framework": 219037.966, - "kernel_overhead": 34383.432, - "profiling_overhead": 59097.552, - "profiling_runs": 55254.458, + "compilation_time": 28215.263, + "data": 79211.125, + "framework": 1355583.8590000002, + "kernel_overhead": 591129.496, + "profiling_overhead": 65506.276, + "profiling_runs": 619736.962, "runtimes": [ - 1852.864 + 3366.56 ], - "search_algorithm": 22.711, - "validation": 16.437 + "search_algorithm": 43.941, + "validation": 24.311 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:8 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 64 + "x": 16, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 39 + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -3007,61 +3007,61 @@ { "name": "time", "unit": "", - "value": 1853.664 + "value": 4003.552 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.093433062880322 + "value": 15.807840367486897 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4344.0 + "value": 6500.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839848.0 + "value": 1869964.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.327966971941587 + "value": 1.530953336101577 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 38830.0 + "value": 74673.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102725.0 + "value": 2100638.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.290329179956203 + "value": 2.539996258228092 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5289277258012499 + "value": 0.11899256640229201 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3091,13 +3091,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.63553138997261 + "value": 98.95186650920881 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92688900760733 + "value": 99.93525412485533 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3109,7 +3109,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3121,43 +3121,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 390070272.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 88080384.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 197394432.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.828736498274765 + "value": 40.969446126094475 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3169,13 +3169,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.33637769230242 + "value": 40.64244363779581 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.263557437649875 + "value": 1.4883707386888112 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3187,7 +3187,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.43036010307041 + "value": 90.27510468850242 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3206,47 +3206,47 @@ "time" ], "times": { - "compilation": 14138.907, - "data": 69745.293, - "framework": 217301.769, - "kernel_overhead": 34265.859, - "profiling_overhead": 58034.545, - "profiling_runs": 55256.072, + "compilation_time": 23089.798, + "data": 79263.028, + "framework": 1922833.412, + "kernel_overhead": 873048.944, + "profiling_overhead": 65324.961, + "profiling_runs": 905196.479, "runtimes": [ - 1853.664 + 4003.552 ], - "search_algorithm": 23.834, - "validation": 16.918 + "search_algorithm": 39.768, + "validation": 26.124 }, - "timestamp": "2026-03-02 14:27:10 UTC" + "timestamp": "2026-03-05 08:58:9 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 64 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 39 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -3254,61 +3254,61 @@ { "name": "time", "unit": "", - "value": 1845.504 + "value": 8081.152 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.978005713152868 + "value": 8.473052487668882 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5592.0 + "value": 4356.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839420.0 + "value": 2018548.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3128921378104135 + "value": 56.79576867244131 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37236.0 + "value": 24425825.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100914.0 + "value": 138414879.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.290097296949842 + "value": 0.6333221065974899 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5288867478425473 + "value": 0.029555182240722004 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3326,25 +3326,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.3149146889141 + "value": 90.36712736096173 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9047134696432 + "value": 99.55225886207498 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3356,7 +3356,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3368,43 +3368,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 390070272.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 88080384.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 197394432.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.83816370232567 + "value": 9.000554932119748 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3416,13 +3416,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.34942913468103 + "value": 20.267081805700595 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.2644623716429235 + "value": 5.576416307379046 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3434,7 +3434,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.44001962895105 + "value": 20.111467635614066 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3453,47 +3453,47 @@ "time" ], "times": { - "compilation": 14566.317, - "data": 67501.197, - "framework": 213170.279, - "kernel_overhead": 34593.433, - "profiling_overhead": 55577.162, - "profiling_runs": 55498.487, + "compilation_time": 23188.755, + "data": 78986.207, + "framework": 577515.828, + "kernel_overhead": 190363.108, + "profiling_overhead": 65587.288, + "profiling_runs": 242579.225, "runtimes": [ - 1845.504 + 8081.152 ], - "search_algorithm": 21.079, - "validation": 16.365 + "search_algorithm": 44.989, + "validation": 32.889 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:9 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 64 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 39 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -3501,61 +3501,61 @@ { "name": "time", "unit": "", - "value": 1860.0 + "value": 9321.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.281960486877697 + "value": 8.343425334472226 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1564.0 + "value": 28472.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839132.0 + "value": 2272060.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.337799850239394 + "value": 81.93752652102069 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31017.0 + "value": 133018806.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099038.0 + "value": 138426928.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.381127374713866 + "value": 0.2752852369893251 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5330927590294268 + "value": 0.012788896839625048 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3573,25 +3573,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.51491572048003 + "value": 98.88763212485345 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87689745914359 + "value": 99.38350626080404 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3603,7 +3603,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3615,43 +3615,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 390070272.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 88080384.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 197394432.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 46.21419104561163 + "value": 7.247180761909658 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3663,13 +3663,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.09330237746549 + "value": 17.569421535480583 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.316039520312549 + "value": 4.6904693479121145 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3681,7 +3681,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.98702750233574 + "value": 16.51125274818198 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3700,19 +3700,19 @@ "time" ], "times": { - "compilation": 14879.035, - "data": 68881.508, - "framework": 216159.36200000002, - "kernel_overhead": 34799.521, - "profiling_overhead": 56901.539, - "profiling_runs": 55576.794, + "compilation_time": 23421.374, + "data": 76565.916, + "framework": 558501.9369999999, + "kernel_overhead": 179959.425, + "profiling_overhead": 63211.784, + "profiling_runs": 238764.812, "runtimes": [ - 1860.0 + 9321.728 ], - "search_algorithm": 25.458, - "validation": 15.284 + "search_algorithm": 42.084, + "validation": 27.281 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:10 UTC" }, { "compilation_data": { @@ -3730,10 +3730,10 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", @@ -3748,49 +3748,49 @@ { "name": "time", "unit": "", - "value": 1774.4 + "value": 2560.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.69779785137747 + "value": 25.25174237736757 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 284.0 + "value": 7472.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870864.0 + "value": 1873100.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4109896500830255 + "value": 2.423103083944468 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31284.0 + "value": 52242.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2097371.0 + "value": 2100326.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.796891131757365 + "value": 4.067482104424384 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -3802,7 +3802,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2715628421676894 + "value": 0.19059009488325 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3832,13 +3832,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.42110015383683 + "value": 98.68732069440172 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92850632480865 + "value": 99.95141337840718 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3850,7 +3850,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -3862,13 +3862,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 362807296.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -3880,7 +3880,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 127926272.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -3892,13 +3892,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 173801472.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.618593265813004 + "value": 45.25513562540067 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3910,13 +3910,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.75976749345133 + "value": 65.08637565786536 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3969641416058836 + "value": 2.3835342648144056 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3928,7 +3928,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.0594926242331 + "value": 91.7031973689859 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3947,47 +3947,47 @@ "time" ], "times": { - "compilation": 49228.741, - "data": 70579.426, - "framework": 209115.252, - "kernel_overhead": 29145.654, - "profiling_overhead": 59171.295, - "profiling_runs": 50218.877, + "compilation_time": 23739.516, + "data": 78451.291, + "framework": 1085704.6770000001, + "kernel_overhead": 458857.706, + "profiling_overhead": 64762.605, + "profiling_runs": 483633.075, "runtimes": [ - 1774.4 + 2560.96 ], - "search_algorithm": 23.047, - "validation": 19.782 + "search_algorithm": 46.072, + "validation": 26.249 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:10 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, - "z": 32 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -3995,61 +3995,61 @@ { "name": "time", "unit": "", - "value": 1785.664 + "value": 3200.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.526361521362368 + "value": 19.788279958817633 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 188.0 + "value": 440.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870160.0 + "value": 1869296.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.416315067902632 + "value": 1.8990258327535976 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30981.0 + "value": 55519.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098988.0 + "value": 2099159.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.796986553928564 + "value": 1.5936383952174313 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2715609517097533 + "value": 0.07467841425540916 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4079,13 +4079,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.32328218533814 + "value": 98.69534744623671 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92587765038782 + "value": 99.96338152799524 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4097,7 +4097,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4109,43 +4109,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 362807296.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 127926272.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 173801472.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.62084937854922 + "value": 45.37721957899672 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4157,13 +4157,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.76156189947932 + "value": 50.999139237216916 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3970298547172604 + "value": 1.0334298234104013 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4175,7 +4175,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.06071803833911 + "value": 95.17578619280563 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4194,47 +4194,47 @@ "time" ], "times": { - "compilation": 14236.209, - "data": 68327.844, - "framework": 203635.83599999998, - "kernel_overhead": 28846.493, - "profiling_overhead": 56972.346, - "profiling_runs": 49489.153, + "compilation_time": 23327.97, + "data": 77236.169, + "framework": 1822333.866, + "kernel_overhead": 826526.159, + "profiling_overhead": 64050.318, + "profiling_runs": 854521.22, "runtimes": [ - 1785.664 + 3200.48 ], - "search_algorithm": 21.49, - "validation": 15.179 + "search_algorithm": 55.23, + "validation": 26.358 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:11 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 32 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -4242,61 +4242,61 @@ { "name": "time", "unit": "", - "value": 1828.48 + "value": 5331.424 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.644876897445393 + "value": 11.821404398294506 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3600.0 + "value": 488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838776.0 + "value": 1866212.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3868480767941698 + "value": 1.1540201302036333 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34631.0 + "value": 88897.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099304.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.796852995815277 + "value": 0.47675879314446995 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.27156581294048227 + "value": 0.02234292557845685 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4326,13 +4326,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.51345226086795 + "value": 73.90297616084939 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92344048005799 + "value": 99.97325231065088 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4344,7 +4344,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4356,43 +4356,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 362807296.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 127926272.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 173801472.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.623141924505845 + "value": 42.99366709684244 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4404,13 +4404,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.76548496030877 + "value": 30.513702766878087 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3971735214956826 + "value": 0.36875690599620736 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4422,7 +4422,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.063196260482734 + "value": 96.9481274731761 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4441,47 +4441,47 @@ "time" ], "times": { - "compilation": 15674.894, - "data": 66963.522, - "framework": 200368.312, - "kernel_overhead": 28805.72, - "profiling_overhead": 54937.765, - "profiling_runs": 49661.305, + "compilation_time": 23384.551, + "data": 78405.678, + "framework": 3023664.119, + "kernel_overhead": 1420981.55, + "profiling_overhead": 64497.095, + "profiling_runs": 1459779.796, "runtimes": [ - 1828.48 + 5331.424 ], - "search_algorithm": 21.473, - "validation": 12.864 + "search_algorithm": 60.413, + "validation": 33.783 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:13 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 32 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -4489,61 +4489,61 @@ { "name": "time", "unit": "", - "value": 1772.32 + "value": 3221.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.19851269420235 + "value": 19.75611216832672 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1152.0 + "value": 5164.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836336.0 + "value": 1868512.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4170265286554993 + "value": 1.8967892115372322 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31499.0 + "value": 59727.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099111.0 + "value": 2100313.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.79717284051924 + "value": 1.5876613691223351 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2715748156792023 + "value": 0.07440115786801932 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4573,13 +4573,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.25363505043956 + "value": 98.74046242952453 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9240947506144 + "value": 99.96540695065934 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4591,7 +4591,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4603,43 +4603,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 362807296.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 127926272.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 173801472.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.62431212870764 + "value": 42.032733759484216 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4651,13 +4651,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.76795283111407 + "value": 50.80876673964882 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.397263897623806 + "value": 1.0295721775856572 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4669,7 +4669,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.064856001683594 + "value": 89.26329991335997 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4688,47 +4688,47 @@ "time" ], "times": { - "compilation": 15406.292, - "data": 69711.22, - "framework": 205942.089, - "kernel_overhead": 28741.815, - "profiling_overhead": 58124.029, - "profiling_runs": 49365.025, + "compilation_time": 23518.816, + "data": 78909.886, + "framework": 1650061.5929999999, + "kernel_overhead": 738690.668, + "profiling_overhead": 65527.093, + "profiling_runs": 766933.946, "runtimes": [ - 1772.32 + 3221.088 ], - "search_algorithm": 21.53, - "validation": 15.203 + "search_algorithm": 49.535, + "validation": 26.401 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 32 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -4736,61 +4736,61 @@ { "name": "time", "unit": "", - "value": 1858.944 + "value": 6089.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.06206085143158 + "value": 10.410555028801188 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4452.0 + "value": 184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839336.0 + "value": 1869012.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.436472247431214 + "value": 1.0215993051422665 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35720.0 + "value": 101019.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100275.0 + "value": 2099900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.875434244928064 + "value": 0.41934615503483813 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2752114506597583 + "value": 0.019651603049772703 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4820,13 +4820,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.37759667830144 + "value": 82.00647950129411 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89319297635204 + "value": 99.96643101028718 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4838,7 +4838,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -4850,43 +4850,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 362807296.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 127926272.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 173801472.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.20920088570921 + "value": 42.84847894496199 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4898,13 +4898,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.0392823837715 + "value": 26.83999861366321 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4438213763588204 + "value": 0.32436033480867404 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4916,7 +4916,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.8880090861337 + "value": 93.65866552637469 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4935,19 +4935,19 @@ "time" ], "times": { - "compilation": 14835.442, - "data": 67834.653, - "framework": 202116.83899999998, - "kernel_overhead": 28623.593, - "profiling_overhead": 55977.339, - "profiling_runs": 49681.254, + "compilation_time": 23092.561, + "data": 78655.257, + "framework": 3501133.285, + "kernel_overhead": 1657649.455, + "profiling_overhead": 64852.55, + "profiling_runs": 1699976.023, "runtimes": [ - 1858.944 + 6089.536 ], - "search_algorithm": 20.885, - "validation": 18.401 + "search_algorithm": 43.867, + "validation": 25.126 }, - "timestamp": "2026-03-02 14:27:11 UTC" + "timestamp": "2026-03-05 08:58:16 UTC" }, { "compilation_data": { @@ -4955,7 +4955,7 @@ "global_size": { "x": 16, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -4965,17 +4965,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -4983,61 +4983,61 @@ { "name": "time", "unit": "", - "value": 1709.248 + "value": 5761.952 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.75953236353669 + "value": 10.98030276336702 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8312.0 + "value": 8104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871908.0 + "value": 1866636.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5429795173434733 + "value": 1.0758850751540154 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 38501.0 + "value": 103024.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100652.0 + "value": 2102625.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9954603282463763 + "value": 0.4409082544539691 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14030932222307607 + "value": 0.02066399010146443 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5067,13 +5067,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.37818491636557 + "value": 82.02083588741642 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91166902723094 + "value": 99.97706337878503 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5085,7 +5085,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5097,43 +5097,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 160432128.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 163381248.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53194391455207 + "value": 43.94881795781012 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5145,13 +5145,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.86917948310973 + "value": 28.219707132532395 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.942661595971218 + "value": 0.34103405836434414 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5163,7 +5163,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.35130026954217 + "value": 94.50478361515717 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5182,19 +5182,19 @@ "time" ], "times": { - "compilation": 58583.683, - "data": 68882.676, - "framework": 211853.225, - "kernel_overhead": 32500.187, - "profiling_overhead": 57705.988, - "profiling_runs": 52764.374, + "compilation_time": 23865.401, + "data": 76549.517, + "framework": 3401054.5, + "kernel_overhead": 1610031.706, + "profiling_overhead": 63404.684, + "profiling_runs": 1651068.593, "runtimes": [ - 1709.248 + 5761.952 ], - "search_algorithm": 31.204, - "validation": 14.814 + "search_algorithm": 41.243, + "validation": 24.726 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:17 UTC" }, { "compilation_data": { @@ -5202,7 +5202,7 @@ "global_size": { "x": 16, "y": 32, - "z": 16 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -5212,7 +5212,7 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 44 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -5222,7 +5222,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -5230,61 +5230,61 @@ { "name": "time", "unit": "", - "value": 1739.392 + "value": 3625.856 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.43832726671079 + "value": 17.483302040230573 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 216.0 + "value": 468.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869852.0 + "value": 1870788.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.525510714045206 + "value": 1.6840275896817833 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28327.0 + "value": 63201.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098984.0 + "value": 2100022.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9954521134411047 + "value": 22.51081365876383 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1402962182017217 + "value": 1.0548462876259923 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5314,13 +5314,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.31904876787739 + "value": 70.60733095398739 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90870912636719 + "value": 99.96605907945067 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5332,7 +5332,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5344,43 +5344,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 587202560.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 117440512.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 160432128.0 + "value": 234881024.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 163381248.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.52910689286324 + "value": 40.0111206897065 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5392,13 +5392,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.86306585601984 + "value": 45.022055838910966 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9425377114379023 + "value": 11.958983582210724 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5410,7 +5410,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.34764631512371 + "value": 57.24553505901626 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5429,47 +5429,47 @@ "time" ], "times": { - "compilation": 14813.837, - "data": 69903.773, - "framework": 213348.207, - "kernel_overhead": 32181.653, - "profiling_overhead": 58491.338, - "profiling_runs": 52771.443, + "compilation_time": 23988.888, + "data": 77465.072, + "framework": 283929.02999999997, + "kernel_overhead": 55881.283, + "profiling_overhead": 64463.299, + "profiling_runs": 86119.376, "runtimes": [ - 1739.392 + 3625.856 ], - "search_algorithm": 27.055, - "validation": 16.977 + "search_algorithm": 45.473, + "validation": 27.765 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:18 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 16 + "x": 16, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 31 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -5477,61 +5477,61 @@ { "name": "time", "unit": "", - "value": 1746.08 + "value": 2123.552 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.864553098030107 + "value": 30.661987662066082 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3584.0 + "value": 5648.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839596.0 + "value": 1871160.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5281195577528206 + "value": 2.952537824835645 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35076.0 + "value": 41159.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103944.0 + "value": 2100134.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9954081735550537 + "value": 19.91381513375865 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14030571757217217 + "value": 0.9329443407934526 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5561,13 +5561,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.4349709774984 + "value": 94.72445583414333 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90951398763967 + "value": 99.9347416328595 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5579,7 +5579,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5591,43 +5591,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 704643072.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 160432128.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 163381248.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53181813066613 + "value": 50.48829933666026 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5639,13 +5639,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.86878436934366 + "value": 79.66323734227547 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9426535895155088 + "value": 10.735865969955091 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5657,7 +5657,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.35106263850053 + "value": 76.24121721979165 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5676,47 +5676,47 @@ "time" ], "times": { - "compilation": 14803.571, - "data": 69112.638, - "framework": 211499.565, - "kernel_overhead": 31866.805, - "profiling_overhead": 58046.57, - "profiling_runs": 52473.552, + "compilation_time": 24748.861, + "data": 76539.563, + "framework": 288926.78500000003, + "kernel_overhead": 63276.932, + "profiling_overhead": 63262.877, + "profiling_runs": 85847.413, "runtimes": [ - 1746.08 + 2123.552 ], - "search_algorithm": 24.323, - "validation": 16.125 + "search_algorithm": 39.52, + "validation": 24.318 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:18 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 16 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 39 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -5724,61 +5724,61 @@ { "name": "time", "unit": "", - "value": 1688.064 + "value": 1842.464 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.35324899622107 + "value": 35.07673051155668 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5352.0 + "value": 3816.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838580.0 + "value": 1872660.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5534382131647413 + "value": 3.3546267506444494 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34400.0 + "value": 37298.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099880.0 + "value": 2103088.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9954200247897678 + "value": 11.29249043505999 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14028764904706462 + "value": 0.5290118643438448 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5808,13 +5808,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.32169793292385 + "value": 95.85549815869105 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89927778061926 + "value": 99.90820026508712 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5826,7 +5826,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -5838,43 +5838,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 390070272.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 160432128.0 + "value": 88080384.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 163381248.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53140947233898 + "value": 45.84379072782065 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5886,13 +5886,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.86626037454936 + "value": 90.36764877670687 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9426024441131828 + "value": 6.265725647603698 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5904,7 +5904,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.34959067853044 + "value": 66.45337668791642 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5923,31 +5923,31 @@ "time" ], "times": { - "compilation": 14630.477, - "data": 69062.602, - "framework": 212199.229, - "kernel_overhead": 32559.446, - "profiling_overhead": 57695.299, - "profiling_runs": 52881.882, + "compilation_time": 23609.302, + "data": 77391.488, + "framework": 238691.445, + "kernel_overhead": 37700.002, + "profiling_overhead": 64229.259, + "profiling_runs": 59370.696, "runtimes": [ - 1688.064 + 1842.464 ], - "search_algorithm": 28.889, - "validation": 17.432 + "search_algorithm": 37.404, + "validation": 28.575 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:18 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 16 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, @@ -5960,10 +5960,10 @@ "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -5971,61 +5971,61 @@ { "name": "time", "unit": "", - "value": 1732.192 + "value": 1809.856 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.028384471249563 + "value": 36.10717142505538 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1024.0 + "value": 4632.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837064.0 + "value": 1873172.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5403768234896105 + "value": 3.4378425441578444 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 29461.0 + "value": 37245.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099906.0 + "value": 2102981.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0033945207103057 + "value": 5.79806750344199 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14069084149335634 + "value": 0.271544928885194 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6055,13 +6055,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.07198374280273 + "value": 98.31134515014756 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9085561052818 + "value": 99.89872967169758 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6073,7 +6073,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6085,43 +6085,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 362807296.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 160432128.0 + "value": 127926272.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 163381248.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.63881496995679 + "value": 42.63065258675496 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6133,13 +6133,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.1328554198977 + "value": 92.78129564885306 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9480046386356222 + "value": 3.397752526203115 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6151,7 +6151,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.51184107232393 + "value": 60.073450544712045 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6170,37 +6170,37 @@ "time" ], "times": { - "compilation": 14810.649, - "data": 66773.137, - "framework": 208322.876, - "kernel_overhead": 32740.908, - "profiling_overhead": 55705.094, - "profiling_runs": 53103.737, + "compilation_time": 23984.829, + "data": 77434.289, + "framework": 227873.301, + "kernel_overhead": 32289.546, + "profiling_overhead": 64669.631, + "profiling_runs": 53479.835, "runtimes": [ - 1732.192 + 1809.856 ], - "search_algorithm": 23.927, - "validation": 17.145 + "search_algorithm": 29.336, + "validation": 25.526 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:18 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -6208,9 +6208,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -6218,61 +6218,61 @@ { "name": "time", "unit": "", - "value": 1723.872 + "value": 1759.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.588189045591278 + "value": 36.88605679702049 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4788.0 + "value": 4720.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868548.0 + "value": 1871476.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.527843606595351 + "value": 3.5413931940320693 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34052.0 + "value": 34133.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100264.0 + "value": 2100132.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4943314148943247 + "value": 2.995810782991468 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06999991113292532 + "value": 0.1403372204733366 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6302,13 +6302,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.74097974848142 + "value": 98.35487056718864 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9287721139449 + "value": 99.91768621862913 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6320,7 +6320,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6332,43 +6332,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 82837504.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 150192128.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.93338271560582 + "value": 39.53732126041529 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6380,13 +6380,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.64133530148415 + "value": 95.88246699405228 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1558218011287757 + "value": 1.942930849732993 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6398,7 +6398,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.51339158662879 + "value": 58.35940850274993 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6417,19 +6417,19 @@ "time" ], "times": { - "compilation": 69379.553, - "data": 66020.175, - "framework": 184920.453, - "kernel_overhead": 22170.916, - "profiling_overhead": 54140.152, - "profiling_runs": 42589.21, + "compilation_time": 23394.156, + "data": 78146.695, + "framework": 234223.10600000003, + "kernel_overhead": 34944.194, + "profiling_overhead": 64988.476, + "profiling_runs": 56143.741, "runtimes": [ - 1723.872 + 1759.776 ], - "search_algorithm": 22.108, - "validation": 13.947 + "search_algorithm": 36.021, + "validation": 27.244 }, - "timestamp": "2026-03-02 14:27:12 UTC" + "timestamp": "2026-03-05 08:58:18 UTC" }, { "compilation_data": { @@ -6465,49 +6465,49 @@ { "name": "time", "unit": "", - "value": 1750.208 + "value": 1766.368 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.220868889489537 + "value": 36.706781480737355 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1792.0 + "value": 3868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869220.0 + "value": 1866968.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.528802546053323 + "value": 3.5345471409390568 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31421.0 + "value": 32325.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100017.0 + "value": 2101961.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4942586932797675 + "value": 1.494729454191167 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -6519,7 +6519,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06999486316995389 + "value": 0.07002956765974384 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6549,13 +6549,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.71603627770547 + "value": 81.71541045816831 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91596080823105 + "value": 99.94107533056885 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6615,7 +6615,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.9340170330616 + "value": 37.943656862678395 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6627,13 +6627,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.6467005621498 + "value": 95.67007632098692 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1558866400943395 + "value": 1.1561691352267707 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6645,7 +6645,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.51639356578415 + "value": 53.529472807435695 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6664,47 +6664,47 @@ "time" ], "times": { - "compilation": 15378.128, - "data": 69877.93, - "framework": 193548.707, - "kernel_overhead": 22166.081, - "profiling_overhead": 58882.144, - "profiling_runs": 42622.552, + "compilation_time": 26996.767, + "data": 77405.279, + "framework": 213877.56900000002, + "kernel_overhead": 25416.561, + "profiling_overhead": 64677.698, + "profiling_runs": 46378.031, "runtimes": [ - 1750.208 + 1766.368 ], - "search_algorithm": 25.207, - "validation": 16.899 + "search_algorithm": 35.299, + "validation": 29.349 }, - "timestamp": "2026-03-02 14:27:13 UTC" + "timestamp": "2026-03-05 08:58:19 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 8 + "x": 16, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -6712,61 +6712,61 @@ { "name": "time", "unit": "", - "value": 1723.392 + "value": 5089.888 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.79847864699977 + "value": 12.488204138540798 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6076.0 + "value": 8484.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836932.0 + "value": 1871756.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5455738676476742 + "value": 1.2149121270152066 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37528.0 + "value": 94975.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103854.0 + "value": 2101539.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4941723309173005 + "value": 7.955330623115381 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06999297336165862 + "value": 0.3728045907633021 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6796,13 +6796,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.74898306986294 + "value": 97.29965499621049 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91735469167092 + "value": 99.96803131613173 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6814,7 +6814,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -6826,43 +6826,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 82837504.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 150192128.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.93148206567682 + "value": 36.2670648971134 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6874,13 +6874,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.64278390343607 + "value": 31.8228317522826 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1558393074267785 + "value": 4.288623810366209 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6892,7 +6892,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.51420211068998 + "value": 84.80955227851396 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6911,47 +6911,47 @@ "time" ], "times": { - "compilation": 14703.937, - "data": 66049.147, - "framework": 185282.677, - "kernel_overhead": 22134.187, - "profiling_overhead": 54310.174, - "profiling_runs": 42789.169, + "compilation_time": 24956.992, + "data": 78760.572, + "framework": 2094173.5379999997, + "kernel_overhead": 956100.512, + "profiling_overhead": 65373.641, + "profiling_runs": 993938.813, "runtimes": [ - 1723.392 + 5089.888 ], - "search_algorithm": 22.319, - "validation": 17.682 + "search_algorithm": 43.43, + "validation": 28.609 }, - "timestamp": "2026-03-02 14:27:13 UTC" + "timestamp": "2026-03-05 08:58:20 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 8 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -6959,61 +6959,61 @@ { "name": "time", "unit": "", - "value": 1744.864 + "value": 5636.992 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.064318022303674 + "value": 11.26038379343097 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3080.0 + "value": 10284.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839448.0 + "value": 1871400.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.538117200073247 + "value": 1.1065764689527726 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32620.0 + "value": 104862.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103065.0 + "value": 2100778.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4941442482665122 + "value": 3.624865238181779 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06995562872962902 + "value": 0.1698796989612288 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7043,13 +7043,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.71308209609535 + "value": 98.66345354418881 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87602381554213 + "value": 99.97334613030138 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7061,7 +7061,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7073,43 +7073,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 82837504.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 150192128.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.92614995241385 + "value": 33.82912344661895 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7121,13 +7121,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.631311810383 + "value": 29.00053171998626 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.155700667630361 + "value": 2.010779054803735 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7139,7 +7139,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.50778322699359 + "value": 85.01953828842187 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7158,47 +7158,47 @@ "time" ], "times": { - "compilation": 15673.229, - "data": 68222.392, - "framework": 189874.055, - "kernel_overhead": 22318.444, - "profiling_overhead": 56651.226, - "profiling_runs": 42681.993, + "compilation_time": 23946.692, + "data": 78781.421, + "framework": 2674624.573, + "kernel_overhead": 1245289.415, + "profiling_overhead": 64785.487, + "profiling_runs": 1285768.25, "runtimes": [ - 1744.864 + 5636.992 ], - "search_algorithm": 22.988, - "validation": 17.411 + "search_algorithm": 43.258, + "validation": 27.305 }, - "timestamp": "2026-03-02 14:27:13 UTC" + "timestamp": "2026-03-05 08:58:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 48 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -7206,61 +7206,61 @@ { "name": "time", "unit": "", - "value": 1688.8 + "value": 7968.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.13852015732547 + "value": 8.213741561021145 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 256.0 + "value": 7112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1831224.0 + "value": 1930164.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5396800085613593 + "value": 49.29505360801224 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28172.0 + "value": 1189163.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098988.0 + "value": 138418599.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.497302452079513 + "value": 1.2720713942208848 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0701041349915365 + "value": 0.05983444294622985 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7278,25 +7278,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.50782647476196 + "value": 97.48216949304346 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.86654762522814 + "value": 100.29803377838853 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7308,7 +7308,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7320,43 +7320,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 82837504.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 150192128.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.015611923075845 + "value": 10.648445960233728 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7368,13 +7368,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.84341762532802 + "value": 20.362801831296213 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.158263958118588 + "value": 11.026536733841553 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7386,7 +7386,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.626461008908436 + "value": 29.982748301906142 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7405,37 +7405,37 @@ "time" ], "times": { - "compilation": 14510.651, - "data": 68954.83, - "framework": 190739.36500000002, - "kernel_overhead": 22152.1, - "profiling_overhead": 57275.768, - "profiling_runs": 42356.667, + "compilation_time": 24034.704, + "data": 77247.88, + "framework": 924329.567, + "kernel_overhead": 365573.167, + "profiling_overhead": 64042.0, + "profiling_runs": 417466.52, "runtimes": [ - 1688.8 + 7968.48 ], - "search_algorithm": 22.55, - "validation": 14.589 + "search_algorithm": 42.494, + "validation": 27.571 }, - "timestamp": "2026-03-02 14:27:13 UTC" + "timestamp": "2026-03-05 08:58:22 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 128 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 19 + "private_memory_size": 64, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -7443,9 +7443,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -7453,61 +7453,61 @@ { "name": "time", "unit": "", - "value": 5040.704 + "value": 8051.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.572542672160095 + "value": 8.513612388747786 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4272.0 + "value": 20452.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869312.0 + "value": 2021856.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2118451600012847 + "value": 54.428578080313386 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 90559.0 + "value": 18354782.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100420.0 + "value": 138423365.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.954638179949114 + "value": 0.6297278110693573 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3727980307963393 + "value": 0.029395657997212197 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7525,25 +7525,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.57303958386943 + "value": 89.93189658289434 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96974007174572 + "value": 99.22107128035603 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7555,7 +7555,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7567,7 +7567,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8212447232.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7579,31 +7579,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3321888768.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 715390976.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.265524109943925 + "value": 9.140009271759984 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7615,13 +7615,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.821727859973986 + "value": 20.2249739903818 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288475043629306 + "value": 10.621073987624818 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7633,7 +7633,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.80660323363401 + "value": 27.654120406567255 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7652,19 +7652,19 @@ "time" ], "times": { - "compilation": 48903.768, - "data": 65025.973, - "framework": 2047210.12, - "kernel_overhead": 945847.631, - "profiling_overhead": 53378.434, - "profiling_runs": 982958.082, + "compilation_time": 24151.263, + "data": 77657.267, + "framework": 896889.344, + "kernel_overhead": 351232.27, + "profiling_overhead": 64127.984, + "profiling_runs": 403871.823, "runtimes": [ - 5040.704 + 8051.872 ], - "search_algorithm": 22.451, - "validation": 16.967 + "search_algorithm": 45.961, + "validation": 29.653 }, - "timestamp": "2026-03-02 14:27:14 UTC" + "timestamp": "2026-03-05 08:58:22 UTC" }, { "compilation_data": { @@ -7672,7 +7672,7 @@ "global_size": { "x": 16, "y": 32, - "z": 128 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -7681,8 +7681,8 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 19 + "private_memory_size": 128, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -7692,7 +7692,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -7700,61 +7700,61 @@ { "name": "time", "unit": "", - "value": 5220.64 + "value": 9519.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.308095767389576 + "value": 8.108039600926485 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12580.0 + "value": 8412.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873964.0 + "value": 2294676.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1941739684220447 + "value": 77.22209660510792 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 103841.0 + "value": 121510579.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101615.0 + "value": 138415654.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.954787536510024 + "value": 0.2719596153914386 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3727704927643376 + "value": 0.01256507703717447 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7772,25 +7772,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.19801961609451 + "value": 95.10563812804043 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96279201284311 + "value": 98.65757846700416 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7802,7 +7802,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -7814,7 +7814,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8212447232.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -7826,31 +7826,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3321888768.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 715390976.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.26506510243737 + "value": 7.238217831223862 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7862,13 +7862,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.821588888596924 + "value": 17.388951544653178 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.2884563150648205 + "value": 8.98952756245193 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7880,7 +7880,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.80625586515987 + "value": 22.862536893725075 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7899,47 +7899,47 @@ "time" ], "times": { - "compilation": 14183.07, - "data": 66699.572, - "framework": 2057459.585, - "kernel_overhead": 948511.36, - "profiling_overhead": 56129.21, - "profiling_runs": 986119.443, + "compilation_time": 23936.45, + "data": 78778.432, + "framework": 885390.257, + "kernel_overhead": 340305.34, + "profiling_overhead": 65606.78, + "profiling_runs": 400699.705, "runtimes": [ - 5220.64 + 9519.712 ], - "search_algorithm": 26.615, - "validation": 16.853 + "search_algorithm": 42.989, + "validation": 29.298 }, - "timestamp": "2026-03-02 14:27:15 UTC" + "timestamp": "2026-03-05 08:58:23 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 128 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 19 + "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -7947,61 +7947,61 @@ { "name": "time", "unit": "", - "value": 5302.272 + "value": 3413.152 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.267846384576929 + "value": 18.75272059200082 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10104.0 + "value": 4804.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840808.0 + "value": 1870060.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2093389561312229 + "value": 1.814011269405932 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98475.0 + "value": 62776.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108945.0 + "value": 2100454.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.955036589733708 + "value": 6.060256575324432 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.37278035962019357 + "value": 0.28394328782780903 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8031,13 +8031,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.85859106171799 + "value": 97.911285400053 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96580386566983 + "value": 99.94751144233335 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8049,7 +8049,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8061,43 +8061,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8212447232.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3321888768.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 715390976.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.264876633102254 + "value": 41.38183186501486 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8109,13 +8109,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.821472401037948 + "value": 48.48510358750233 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288440616546129 + "value": 3.3617601120240876 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8127,7 +8127,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.80592348883233 + "value": 87.59582903647059 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8146,47 +8146,47 @@ "time" ], "times": { - "compilation": 14163.561, - "data": 63883.455, - "framework": 2058825.6609999998, - "kernel_overhead": 951616.288, - "profiling_overhead": 54241.445, - "profiling_runs": 989084.473, + "compilation_time": 23942.146, + "data": 77949.289, + "framework": 1374214.727, + "kernel_overhead": 601008.635, + "profiling_overhead": 64979.452, + "profiling_runs": 630277.351, "runtimes": [ - 5302.272 + 3413.152 ], - "search_algorithm": 31.338, - "validation": 19.367 + "search_algorithm": 45.135, + "validation": 30.304 }, - "timestamp": "2026-03-02 14:27:16 UTC" + "timestamp": "2026-03-05 08:58:24 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 128 + "x": 16, + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 19 + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -8194,61 +8194,61 @@ { "name": "time", "unit": "", - "value": 5223.776 + "value": 4018.24 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.298409619735693 + "value": 15.786233077105779 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8268.0 + "value": 1432.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843392.0 + "value": 1870204.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2101768209131532 + "value": 1.522631298547231 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 95355.0 + "value": 68994.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101162.0 + "value": 2098838.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.954313311081076 + "value": 2.539960480740308 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.37278002035000307 + "value": 0.11902826902016136 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8278,13 +8278,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.09506735108882 + "value": 98.8134396279408 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96908466832195 + "value": 99.9638594993722 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8296,7 +8296,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8308,43 +8308,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8212447232.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3321888768.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 715390976.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.26412953981809 + "value": 40.97162586373449 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8356,13 +8356,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.820399118462344 + "value": 40.64300441081266 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288295974949027 + "value": 1.4883912748100339 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8374,7 +8374,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.8030850633873 + "value": 90.27643673242108 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8393,47 +8393,47 @@ "time" ], "times": { - "compilation": 13286.831, - "data": 61610.138, - "framework": 2050654.9640000002, - "kernel_overhead": 949914.734, - "profiling_overhead": 51830.53, - "profiling_runs": 987299.562, + "compilation_time": 23519.892, + "data": 77752.174, + "framework": 1951525.855, + "kernel_overhead": 888404.147, + "profiling_overhead": 64583.68, + "profiling_runs": 920785.854, "runtimes": [ - 5223.776 + 4018.24 ], - "search_algorithm": 23.685, - "validation": 16.481 + "search_algorithm": 42.947, + "validation": 28.688 }, - "timestamp": "2026-03-02 14:27:17 UTC" + "timestamp": "2026-03-05 08:58:25 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 128 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 19 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -8441,61 +8441,61 @@ { "name": "time", "unit": "", - "value": 5284.096 + "value": 8105.695 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.218901823156388 + "value": 8.49620818685416 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7624.0 + "value": 19088.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838304.0 + "value": 2022612.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2024433487824826 + "value": 57.13862558415988 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 92251.0 + "value": 25549471.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101137.0 + "value": 138428000.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.958508701782195 + "value": 0.632692679521131 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3729185929075591 + "value": 0.02949538268122726 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8513,25 +8513,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.87309587291863 + "value": 91.63900562744628 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96016633730657 + "value": 99.49778428179343 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8543,7 +8543,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8555,43 +8555,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8212447232.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 3321888768.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 715390976.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.28072737012859 + "value": 8.987357173890159 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8603,13 +8603,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.835067668251565 + "value": 20.237148718833968 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.2902727912292145 + "value": 5.568180323761201 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8621,7 +8621,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.84221122305415 + "value": 20.081828008711465 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8640,47 +8640,47 @@ "time" ], "times": { - "compilation": 13485.982, - "data": 61621.14, - "framework": 2062871.89, - "kernel_overhead": 955930.933, - "profiling_overhead": 51710.912, - "profiling_runs": 993608.905, + "compilation_time": 23246.675, + "data": 79161.263, + "framework": 583115.382, + "kernel_overhead": 193264.133, + "profiling_overhead": 64954.777, + "profiling_runs": 245735.209, "runtimes": [ - 5284.096 + 8105.695 ], - "search_algorithm": 38.967, - "validation": 18.585 + "search_algorithm": 44.439, + "validation": 32.032 }, - "timestamp": "2026-03-02 14:27:18 UTC" + "timestamp": "2026-03-05 08:58:25 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 64 + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 22 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -8688,61 +8688,61 @@ { "name": "time", "unit": "", - "value": 5631.296 + "value": 9374.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.743178697412345 + "value": 8.313393460609035 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12728.0 + "value": 22144.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1878104.0 + "value": 2276492.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.11394562871978 + "value": 81.74701818545796 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 112034.0 + "value": 132980725.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108815.0 + "value": 138425674.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.624653242658123 + "value": 0.27354139990324094 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1698607247114455 + "value": 0.012747134711134554 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8760,25 +8760,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.79935624600607 + "value": 98.78240329563454 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96696447010724 + "value": 99.0547981587486 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8790,7 +8790,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -8802,43 +8802,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10049552384.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 786956288.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.827513401376734 + "value": 7.247046953817164 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8850,13 +8850,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.999143704873966 + "value": 17.57016141480597 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.01068281547466 + "value": 4.690666871848224 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8868,7 +8868,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01546208575631 + "value": 16.512002458387595 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8887,19 +8887,19 @@ "time" ], "times": { - "compilation": 51196.826, - "data": 60633.808, - "framework": 2642218.2739999997, - "kernel_overhead": 1245498.89, - "profiling_overhead": 50850.34, - "profiling_runs": 1285235.236, + "compilation_time": 23564.857, + "data": 79101.421, + "framework": 567633.031, + "kernel_overhead": 181854.794, + "profiling_overhead": 65432.133, + "profiling_runs": 241244.683, "runtimes": [ - 5631.296 + 9374.08 ], - "search_algorithm": 23.971, - "validation": 16.965 + "search_algorithm": 43.86, + "validation": 26.859 }, - "timestamp": "2026-03-02 14:27:20 UTC" + "timestamp": "2026-03-05 08:58:25 UTC" }, { "compilation_data": { @@ -8907,7 +8907,7 @@ "global_size": { "x": 16, "y": 32, - "z": 64 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -8917,17 +8917,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -8935,61 +8935,61 @@ { "name": "time", "unit": "", - "value": 5729.184 + "value": 2567.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.561942755380596 + "value": 25.207907242540905 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 452.0 + "value": 4852.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870364.0 + "value": 1872520.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1024124613080444 + "value": 2.419401641750456 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 96152.0 + "value": 48727.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099278.0 + "value": 2100432.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.624717112803529 + "value": 4.067263591147413 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1698702054357429 + "value": 0.19058770047052112 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9019,13 +9019,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68932169450795 + "value": 98.50990289124503 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97322007115042 + "value": 99.95062588354064 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9037,7 +9037,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9049,43 +9049,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10049552384.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 786956288.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82699861229088 + "value": 45.25549293826121 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9097,13 +9097,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.998947625241282 + "value": 65.08607076631687 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.010669220109503 + "value": 2.383523099352424 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9115,7 +9115,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01490755761893 + "value": 91.70290769523841 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9134,47 +9134,47 @@ "time" ], "times": { - "compilation": 14112.735, - "data": 64938.396, - "framework": 2650753.721, - "kernel_overhead": 1245459.925, - "profiling_overhead": 55161.518, - "profiling_runs": 1285193.882, + "compilation_time": 23574.048, + "data": 77517.917, + "framework": 1103053.559, + "kernel_overhead": 468252.871, + "profiling_overhead": 64095.719, + "profiling_runs": 493187.052, "runtimes": [ - 5729.184 + 2567.712 ], - "search_algorithm": 21.763, - "validation": 17.532 + "search_algorithm": 44.915, + "validation": 27.987 }, - "timestamp": "2026-03-02 14:27:21 UTC" + "timestamp": "2026-03-05 08:58:26 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 64 + "x": 16, + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -9182,61 +9182,61 @@ { "name": "time", "unit": "", - "value": 5733.312 + "value": 3243.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.46964515845586 + "value": 19.76997696907661 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12304.0 + "value": 7396.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838388.0 + "value": 1871492.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1085582892731665 + "value": 1.9056478579268425 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106189.0 + "value": 59948.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101548.0 + "value": 2100194.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6245812309074075 + "value": 1.5935895927919896 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16986547961735202 + "value": 0.0746768167499711 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9266,13 +9266,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.90376443190702 + "value": 98.60599824542064 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97090632039065 + "value": 99.96883889066275 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9284,7 +9284,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9296,43 +9296,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10049552384.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 786956288.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82766003376139 + "value": 45.37399839717826 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9344,13 +9344,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.998812009484638 + "value": 50.995264258032535 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.010659817063876 + "value": 1.0333513021036866 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9362,7 +9362,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01449107394457 + "value": 95.16866195031263 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9381,47 +9381,47 @@ "time" ], "times": { - "compilation": 14569.801, - "data": 63214.676, - "framework": 2652139.545, - "kernel_overhead": 1247636.846, - "profiling_overhead": 53559.288, - "profiling_runs": 1287728.735, + "compilation_time": 24698.927, + "data": 77318.331, + "framework": 1845069.205, + "kernel_overhead": 837620.462, + "profiling_overhead": 64158.042, + "profiling_runs": 865972.37, "runtimes": [ - 5733.312 + 3243.936 ], - "search_algorithm": 27.741, - "validation": 16.008 + "search_algorithm": 45.122, + "validation": 33.435 }, - "timestamp": "2026-03-02 14:27:23 UTC" + "timestamp": "2026-03-05 08:58:27 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 64 + "x": 16, + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -9429,61 +9429,61 @@ { "name": "time", "unit": "", - "value": 5702.176 + "value": 5376.352 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.42452904790356 + "value": 11.751102164837112 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1488.0 + "value": 560.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839432.0 + "value": 1865688.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1055722680696183 + "value": 1.1553378586569238 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98455.0 + "value": 89059.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103070.0 + "value": 2099231.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.624700573383649 + "value": 0.47674492032141996 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16986132933285572 + "value": 0.02234357479304373 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9513,13 +9513,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.65745623607953 + "value": 73.8668059802131 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96732969934732 + "value": 99.97879901503529 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9531,7 +9531,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9543,43 +9543,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10049552384.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 786956288.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82749552994452 + "value": 42.99254821397175 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9591,13 +9591,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.999140979352028 + "value": 30.5128964853652 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.010682626498041 + "value": 0.3687471621156195 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9609,7 +9609,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.0154744051495 + "value": 96.94559621918853 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9628,47 +9628,47 @@ "time" ], "times": { - "compilation": 14911.855, - "data": 68516.999, - "framework": 2660516.805, - "kernel_overhead": 1247332.125, - "profiling_overhead": 56986.308, - "profiling_runs": 1287681.373, + "compilation_time": 23545.831, + "data": 77761.116, + "framework": 3064901.3959999997, + "kernel_overhead": 1441717.68, + "profiling_overhead": 64615.698, + "profiling_runs": 1480806.902, "runtimes": [ - 5702.176 + 5376.352 ], - "search_algorithm": 25.773, - "validation": 18.631 + "search_algorithm": 43.662, + "validation": 31.737 }, - "timestamp": "2026-03-02 14:27:24 UTC" + "timestamp": "2026-03-05 08:58:29 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 64 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -9676,61 +9676,61 @@ { "name": "time", "unit": "", - "value": 5702.368 + "value": 3239.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.507094919065802 + "value": 19.676899672597482 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16200.0 + "value": 6868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843176.0 + "value": 1870176.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1123020992377899 + "value": 1.900316093497477 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110576.0 + "value": 59496.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106498.0 + "value": 2102625.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6210866215057917 + "value": 1.5876349387156388 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.169701553895452 + "value": 0.07439643260387332 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9760,13 +9760,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.54019756391658 + "value": 98.68730616615201 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97258160439574 + "value": 99.96314310033935 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9778,7 +9778,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -9790,43 +9790,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10049552384.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 786956288.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.79399985466882 + "value": 42.031044473247256 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9838,13 +9838,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.970341734395117 + "value": 50.80669042848965 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.0086858038496613 + "value": 1.0295301038976175 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9856,7 +9856,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.93103610722204 + "value": 89.25976211512703 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9875,47 +9875,47 @@ "time" ], "times": { - "compilation": 18844.701, - "data": 62463.765, - "framework": 2660535.871, - "kernel_overhead": 1252560.574, - "profiling_overhead": 52689.486, - "profiling_runs": 1292822.046, + "compilation_time": 23438.894, + "data": 78474.582, + "framework": 1673852.11, + "kernel_overhead": 750851.56, + "profiling_overhead": 65227.097, + "profiling_runs": 779298.871, "runtimes": [ - 5702.368 + 3239.712 ], - "search_algorithm": 32.311, - "validation": 24.072 + "search_algorithm": 41.534, + "validation": 27.87 }, - "timestamp": "2026-03-02 14:27:25 UTC" + "timestamp": "2026-03-05 08:58:29 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 32 + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -9923,61 +9923,61 @@ { "name": "time", "unit": "", - "value": 8060.864 + "value": 6196.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.184620592042736 + "value": 10.408186410879203 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2840.0 + "value": 14536.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1929564.0 + "value": 1872300.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.5773008991906 + "value": 1.0350176111743024 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 858275.0 + "value": 116833.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421889.0 + "value": 2109434.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.252531365625944 + "value": 0.41934956282347363 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0587562186414145 + "value": 0.01965395350598828 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9995,25 +9995,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.72727249119923 + "value": 81.97095322328906 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.7135115837206 + "value": 99.97934039681408 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10025,7 +10025,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10037,43 +10037,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3982491648.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 155189248.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 395247616.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.51608919320568 + "value": 42.84780345181573 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10085,13 +10085,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.113077600418634 + "value": 26.839742837875757 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.891310087336068 + "value": 0.32435724376827396 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10103,7 +10103,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.61504097167848 + "value": 93.65781358633983 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10122,19 +10122,19 @@ "time" ], "times": { - "compilation": 60794.613, - "data": 63849.84, - "framework": 909841.787, - "kernel_overhead": 369762.414, - "profiling_overhead": 53552.057, - "profiling_runs": 422677.476, + "compilation_time": 24051.955, + "data": 77901.988, + "framework": 3544146.81, + "kernel_overhead": 1679457.55, + "profiling_overhead": 64646.994, + "profiling_runs": 1722140.278, "runtimes": [ - 8060.864 + 6196.224 ], - "search_algorithm": 32.709, - "validation": 25.389 + "search_algorithm": 46.478, + "validation": 29.403 }, - "timestamp": "2026-03-02 14:27:26 UTC" + "timestamp": "2026-03-05 08:58:31 UTC" }, { "compilation_data": { @@ -10142,7 +10142,7 @@ "global_size": { "x": 16, "y": 32, - "z": 32 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -10151,18 +10151,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -10170,61 +10170,61 @@ { "name": "time", "unit": "", - "value": 8162.719 + "value": 5797.92 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.242340999664487 + "value": 10.901583137056141 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18208.0 + "value": 2592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1934612.0 + "value": 1869536.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.24921855338431 + "value": 1.0732266868130578 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 1187902.0 + "value": 98147.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425285.0 + "value": 2099935.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.276159495206637 + "value": 0.440909611544948 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05955755108248684 + "value": 0.02066316031938708 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10242,25 +10242,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.53313421125242 + "value": 81.98268067041548 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.6917646515126 + "value": 99.97251301932123 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10272,7 +10272,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10284,43 +10284,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3982491648.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 155189248.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 395247616.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.662595036670414 + "value": 43.94888545694018 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10332,13 +10332,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.39183227142029 + "value": 28.219858343080162 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.042256830568897 + "value": 0.3410358857379072 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10350,7 +10350,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.025508635859254 + "value": 94.50533300013349 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10369,19 +10369,19 @@ "time" ], "times": { - "compilation": 24018.314, - "data": 62821.551, - "framework": 915942.1410000001, - "kernel_overhead": 373641.612, - "profiling_overhead": 53095.801, - "profiling_runs": 426383.177, + "compilation_time": 23880.298, + "data": 77182.508, + "framework": 3450298.107, + "kernel_overhead": 1633873.925, + "profiling_overhead": 64196.645, + "profiling_runs": 1675045.029, "runtimes": [ - 8162.719 + 5797.92 ], - "search_algorithm": 34.306, - "validation": 25.689 + "search_algorithm": 45.877, + "validation": 31.952 }, - "timestamp": "2026-03-02 14:27:27 UTC" + "timestamp": "2026-03-05 08:58:33 UTC" }, { "compilation_data": { @@ -10389,7 +10389,7 @@ "global_size": { "x": 8, "y": 128, - "z": 32 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -10398,18 +10398,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -10417,61 +10417,61 @@ { "name": "time", "unit": "", - "value": 8385.44 + "value": 3645.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.203995753375101 + "value": 17.137741937406144 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18788.0 + "value": 428.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912904.0 + "value": 1840160.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.91948830368562 + "value": 1.686708818867508 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 702128.0 + "value": 61519.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425531.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2685177925548012 + "value": 22.560828566791557 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05949840059312468 + "value": 1.0572905552840435 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10489,25 +10489,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.49477160675931 + "value": 60.41902113511516 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.88878759710131 + "value": 99.9533051246498 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10519,7 +10519,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10531,43 +10531,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3982491648.0 + "value": 587202560.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 117440512.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 155189248.0 + "value": 234881024.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 395247616.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.738578719287267 + "value": 40.10783949093691 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10579,13 +10579,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.53699706097775 + "value": 45.13213808774214 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.12086413116422 + "value": 11.988224179556505 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10597,7 +10597,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.239232707174757 + "value": 57.3854259315578 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10616,47 +10616,47 @@ "time" ], "times": { - "compilation": 25329.644, - "data": 62504.348, - "framework": 911986.6509999998, - "kernel_overhead": 371833.562, - "profiling_overhead": 52616.098, - "profiling_runs": 425032.643, + "compilation_time": 23952.175, + "data": 77899.058, + "framework": 285335.06, + "kernel_overhead": 56155.138, + "profiling_overhead": 64781.319, + "profiling_runs": 86499.545, "runtimes": [ - 8385.44 + 3645.664 ], - "search_algorithm": 35.903, - "validation": 27.142 + "search_algorithm": 46.146, + "validation": 27.811 }, - "timestamp": "2026-03-02 14:27:27 UTC" + "timestamp": "2026-03-05 08:58:33 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 32 + "y": 128, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -10664,61 +10664,61 @@ { "name": "time", "unit": "", - "value": 8036.992 + "value": 2123.584 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.2623527480153 + "value": 29.989669958419956 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 23328.0 + "value": 7568.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1917316.0 + "value": 1838836.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.17345212913307 + "value": 2.9603830050088193 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 908566.0 + "value": 43062.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138429922.0 + "value": 2100646.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2772747716529027 + "value": 19.910667443335285 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05970321409833485 + "value": 0.9329922434769602 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10736,25 +10736,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.63809629812862 + "value": 95.67205231795086 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.85814422132965 + "value": 99.94468710535702 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10766,7 +10766,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -10778,43 +10778,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3982491648.0 + "value": 704643072.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 155189248.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 395247616.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.7796138793179 + "value": 50.487746999362045 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10826,13 +10826,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.614080144245097 + "value": 79.65940003004579 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.162604921859284 + "value": 10.73534883217414 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10844,7 +10844,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.35275266846908 + "value": 76.23737559343756 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10863,47 +10863,47 @@ "time" ], "times": { - "compilation": 27418.82, - "data": 63688.572, - "framework": 917240.882, - "kernel_overhead": 373512.651, - "profiling_overhead": 53852.778, - "profiling_runs": 426186.881, + "compilation_time": 23659.473, + "data": 77516.922, + "framework": 291948.339, + "kernel_overhead": 63836.474, + "profiling_overhead": 64010.084, + "profiling_runs": 86584.859, "runtimes": [ - 8036.992 + 2123.584 ], - "search_algorithm": 37.781, - "validation": 29.368 + "search_algorithm": 40.166, + "validation": 30.321 }, - "timestamp": "2026-03-02 14:27:28 UTC" + "timestamp": "2026-03-05 08:58:33 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -10911,61 +10911,61 @@ { "name": "time", "unit": "", - "value": 8009.504 + "value": 1835.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.194931691947496 + "value": 34.25497972328244 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 800.0 + "value": 448.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1909372.0 + "value": 1837592.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.188753134267664 + "value": 3.3302672811728042 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 698305.0 + "value": 32134.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417838.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2768598513929923 + "value": 11.293169227708892 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05984099622012652 + "value": 0.5290606063509016 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10983,25 +10983,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.41365317820662 + "value": 97.10898177595263 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98529108203337 + "value": 99.92591670147849 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11013,7 +11013,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11025,43 +11025,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3982491648.0 + "value": 390070272.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 155189248.0 + "value": 88080384.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 395247616.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.682977412304158 + "value": 45.842067634155015 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11073,13 +11073,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.42873155516926 + "value": 90.35995178337744 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.062237936856793 + "value": 6.265191969355273 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11091,7 +11091,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.079840181358215 + "value": 66.4475216726092 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11110,47 +11110,47 @@ "time" ], "times": { - "compilation": 27499.251, - "data": 61914.431, - "framework": 910273.959, - "kernel_overhead": 371871.359, - "profiling_overhead": 51926.808, - "profiling_runs": 424561.361, + "compilation_time": 23485.077, + "data": 78835.333, + "framework": 241120.76499999998, + "kernel_overhead": 38186.301, + "profiling_overhead": 64516.624, + "profiling_runs": 59582.507, "runtimes": [ - 8009.504 + 1835.104 ], - "search_algorithm": 38.191, - "validation": 28.099 + "search_algorithm": 40.834, + "validation": 32.341 }, - "timestamp": "2026-03-02 14:27:28 UTC" + "timestamp": "2026-03-05 08:58:34 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 16 + "x": 8, + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -11158,61 +11158,61 @@ { "name": "time", "unit": "", - "value": 8049.76 + "value": 1808.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.5059229752296135 + "value": 35.03152091856614 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11880.0 + "value": 420.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2022748.0 + "value": 1840904.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.22798531432576 + "value": 3.4227764602235693 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17214324.0 + "value": 31472.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420513.0 + "value": 2099158.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.629185588519264 + "value": 5.798534979269805 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029330501234110438 + "value": 0.27161857749053875 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11230,25 +11230,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.42199287317905 + "value": 98.58522366411808 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.41614763737418 + "value": 99.91880993463515 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11260,7 +11260,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11272,43 +11272,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3635412992.0 + "value": 362807296.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2252341248.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 77594624.0 + "value": 127926272.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 367034368.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.195118635589191 + "value": 42.634966048260914 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11320,13 +11320,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.34519333446138 + "value": 92.78780893282708 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.684206753522078 + "value": 3.397991049786148 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11338,7 +11338,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.81849368346201 + "value": 60.07747484577879 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11357,45 +11357,45 @@ "time" ], "times": { - "compilation": 87761.675, - "data": 61438.336, - "framework": 872988.416, - "kernel_overhead": 353419.37, - "profiling_overhead": 51590.114, - "profiling_runs": 406540.596, + "compilation_time": 24919.899, + "data": 77687.52, + "framework": 227850.243, + "kernel_overhead": 32281.823, + "profiling_overhead": 64349.927, + "profiling_runs": 53530.973, "runtimes": [ - 8049.76 + 1808.864 ], - "search_algorithm": 36.389, - "validation": 26.36 + "search_algorithm": 33.736, + "validation": 26.667 }, - "timestamp": "2026-03-02 14:27:29 UTC" + "timestamp": "2026-03-05 08:58:34 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, + "x": 8, + "y": 128, "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -11405,49 +11405,49 @@ { "name": "time", "unit": "", - "value": 8167.616 + "value": 1750.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.480627915386742 + "value": 36.35929272985014 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19164.0 + "value": 816.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2022352.0 + "value": 1837696.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.508455844985804 + "value": 3.5330971638978657 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 18176753.0 + "value": 30899.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421402.0 + "value": 2099900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6316692278440132 + "value": 2.995923296779623 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -11459,7 +11459,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029444467155065903 + "value": 0.14035350625628495 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11477,25 +11477,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.84592561618177 + "value": 98.42183782524971 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.66422769867958 + "value": 99.92331413600127 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11507,7 +11507,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11519,25 +11519,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3635412992.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2252341248.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 77594624.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -11549,13 +11549,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 367034368.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.207656559502038 + "value": 39.53892258511228 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11567,13 +11567,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.37289168868039 + "value": 95.88819296020151 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.698752446863164 + "value": 1.9430468788322082 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11585,7 +11585,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.856386754387458 + "value": 58.36269039133033 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11604,19 +11604,19 @@ "time" ], "times": { - "compilation": 26257.373, - "data": 62407.61, - "framework": 882208.0659999999, - "kernel_overhead": 357116.584, - "profiling_overhead": 52374.961, - "profiling_runs": 410308.911, + "compilation_time": 23906.91, + "data": 77753.871, + "framework": 234479.684, + "kernel_overhead": 35611.368, + "profiling_overhead": 64469.598, + "profiling_runs": 56644.847, "runtimes": [ - 8167.616 + 1750.592 ], - "search_algorithm": 49.483, - "validation": 28.136 + "search_algorithm": 44.024, + "validation": 26.473 }, - "timestamp": "2026-03-02 14:27:29 UTC" + "timestamp": "2026-03-05 08:58:34 UTC" }, { "compilation_data": { @@ -11624,7 +11624,7 @@ "global_size": { "x": 8, "y": 128, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -11633,18 +11633,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -11652,61 +11652,61 @@ { "name": "time", "unit": "", - "value": 8186.847 + "value": 1757.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.380957784145955 + "value": 35.99321109325566 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4508.0 + "value": 828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2002000.0 + "value": 1834008.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.6866653460216 + "value": 3.5294395110140293 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 15077355.0 + "value": 30642.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138423319.0 + "value": 2099902.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6324981297719346 + "value": 1.4945712653788041 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029545391211106897 + "value": 0.07001765256247058 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11724,25 +11724,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.82228191852329 + "value": 81.73421665286492 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.57484724842796 + "value": 99.9357975647669 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11754,7 +11754,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -11766,43 +11766,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3635412992.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2252341248.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 77594624.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 367034368.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.154037602257668 + "value": 37.93871166564994 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11814,13 +11814,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.255771704201535 + "value": 95.65885027668679 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.63724729876404 + "value": 1.156033468919921 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11832,7 +11832,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.69622343215775 + "value": 53.52304406153514 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11851,37 +11851,37 @@ "time" ], "times": { - "compilation": 28404.935, - "data": 61859.761, - "framework": 875972.183, - "kernel_overhead": 354547.332, - "profiling_overhead": 51995.282, - "profiling_runs": 407569.808, + "compilation_time": 24316.543, + "data": 77295.961, + "framework": 213129.02, + "kernel_overhead": 25178.134, + "profiling_overhead": 64323.99, + "profiling_runs": 46330.935, "runtimes": [ - 8186.847 + 1757.568 ], - "search_algorithm": 36.989, - "validation": 24.629 + "search_algorithm": 37.353, + "validation": 29.665 }, - "timestamp": "2026-03-02 14:27:30 UTC" + "timestamp": "2026-03-05 08:58:34 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 16 + "y": 128, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 19 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -11889,9 +11889,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -11899,61 +11899,61 @@ { "name": "time", "unit": "", - "value": 8147.968 + "value": 5214.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.383817084516698 + "value": 12.07312538645365 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5324.0 + "value": 10592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1997164.0 + "value": 1838864.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.92331861241032 + "value": 1.2102880658254853 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 15772037.0 + "value": 95937.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415261.0 + "value": 2100774.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6337873329365352 + "value": 7.955541533312376 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029581964092194813 + "value": 0.3727806988910017 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11971,25 +11971,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.22455370436917 + "value": 97.6730509041806 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.58025848902223 + "value": 99.96091792317344 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12001,7 +12001,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12013,7 +12013,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3635412992.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12025,31 +12025,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2252341248.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 77594624.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 367034368.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.164616694962866 + "value": 36.26651754736503 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12061,13 +12061,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.279743321310942 + "value": 31.823056750186478 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.649835909213827 + "value": 4.2886541323493494 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12079,7 +12079,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.72902255952332 + "value": 84.81008492916699 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12098,37 +12098,37 @@ "time" ], "times": { - "compilation": 27597.985, - "data": 63292.263, - "framework": 883302.536, - "kernel_overhead": 357095.196, - "profiling_overhead": 53054.477, - "profiling_runs": 409860.6, + "compilation_time": 23441.443, + "data": 77999.087, + "framework": 2120353.878, + "kernel_overhead": 969764.392, + "profiling_overhead": 64696.601, + "profiling_runs": 1007893.798, "runtimes": [ - 8147.968 + 5214.08 ], - "search_algorithm": 39.042, - "validation": 24.502 + "search_algorithm": 43.021, + "validation": 34.157 }, - "timestamp": "2026-03-02 14:27:30 UTC" + "timestamp": "2026-03-05 08:58:35 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 16 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 22 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -12136,9 +12136,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -12146,61 +12146,61 @@ { "name": "time", "unit": "", - "value": 8075.104 + "value": 5675.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.527708701358737 + "value": 11.03346418996553 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 20364.0 + "value": 6548.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2004032.0 + "value": 1837188.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 51.61483628154505 + "value": 1.103670829000328 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 8478368.0 + "value": 99597.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419092.0 + "value": 2100330.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.64344332703454 + "value": 3.6248589883838256 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029748484408167606 + "value": 0.16988083214925623 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12218,25 +12218,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.09917633866216 + "value": 98.79257061229609 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.43864767585366 + "value": 99.97341421313484 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12248,7 +12248,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12260,7 +12260,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3635412992.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12272,31 +12272,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2252341248.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 77594624.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 367034368.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.323495025060293 + "value": 33.82993787490858 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12308,13 +12308,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.63041210824209 + "value": 29.000705419205207 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.833988389850767 + "value": 2.0107910984019233 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12326,7 +12326,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.20849745906205 + "value": 85.01998463753347 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12345,47 +12345,47 @@ "time" ], "times": { - "compilation": 31167.21, - "data": 61774.972, - "framework": 878569.583, - "kernel_overhead": 356038.105, - "profiling_overhead": 51764.166, - "profiling_runs": 408992.34, + "compilation_time": 24813.785, + "data": 77242.395, + "framework": 2709245.0590000004, + "kernel_overhead": 1263717.148, + "profiling_overhead": 64103.557, + "profiling_runs": 1304181.959, "runtimes": [ - 8075.104 + 5675.264 ], - "search_algorithm": 53.32, - "validation": 29.118 + "search_algorithm": 47.307, + "validation": 26.366 }, - "timestamp": "2026-03-02 14:27:31 UTC" + "timestamp": "2026-03-05 08:58:37 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 32, + "registers": 27 }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -12393,61 +12393,61 @@ { "name": "time", "unit": "", - "value": 9279.456 + "value": 8113.408 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.36942070979492 + "value": 8.005782068027793 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 27528.0 + "value": 3584.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2309228.0 + "value": 1910692.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 74.70212450091971 + "value": 48.80624262034699 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109139973.0 + "value": 557324.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138429471.0 + "value": 138420853.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2762158037647 + "value": 1.2615769607938871 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012735454857528214 + "value": 0.059424016471594565 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12477,13 +12477,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.08386443727893 + "value": 90.09322937594767 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.82728539748061 + "value": 100.05288590272083 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12495,7 +12495,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12507,7 +12507,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3461873664.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12519,31 +12519,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2218786816.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 38797312.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 352927744.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.402098991329166 + "value": 10.602771476571903 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12555,13 +12555,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.774326417722506 + "value": 20.272676234472005 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.188753952521338 + "value": 10.977733371108132 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12573,7 +12573,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.369212426757887 + "value": 29.849980026461516 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12592,36 +12592,36 @@ "time" ], "times": { - "compilation": 135374.874, - "data": 61325.446, - "framework": 866369.596, - "kernel_overhead": 346332.829, - "profiling_overhead": 51428.395, - "profiling_runs": 407282.926, + "compilation_time": 24541.867, + "data": 78103.696, + "framework": 940651.856, + "kernel_overhead": 372049.397, + "profiling_overhead": 65245.973, + "profiling_runs": 425252.79, "runtimes": [ - 9279.456 + 8113.408 ], - "search_algorithm": 42.718, - "validation": 31.837 + "search_algorithm": 30.921, + "validation": 29.267 }, - "timestamp": "2026-03-02 14:27:31 UTC" + "timestamp": "2026-03-05 08:58:37 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 8 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, + "private_memory_size": 64, "registers": 32 }, "configuration": { @@ -12629,10 +12629,10 @@ "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -12640,61 +12640,61 @@ { "name": "time", "unit": "", - "value": 9508.896 + "value": 8132.608 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.231955657671571 + "value": 8.38922658769437 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8092.0 + "value": 14632.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2305880.0 + "value": 2003228.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 77.20380121387784 + "value": 53.679918617497535 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 121061452.0 + "value": 15043916.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138448087.0 + "value": 138424825.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27182010814509167 + "value": 0.6331998246094167 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01256859051542329 + "value": 0.02959697461498221 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12724,13 +12724,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.23530513627956 + "value": 88.89016713988607 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.51294660713825 + "value": 99.60706240705633 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12742,7 +12742,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -12754,7 +12754,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3461873664.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -12766,31 +12766,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2218786816.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 38797312.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 352927744.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.252872710214968 + "value": 9.166390636934457 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12802,13 +12802,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.419350628256574 + "value": 20.2845737195403 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.005242909016918 + "value": 10.652372575862106 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12820,7 +12820,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.902517853950734 + "value": 27.735547038133845 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12839,19 +12839,19 @@ "time" ], "times": { - "compilation": 31465.696, - "data": 60791.522, - "framework": 869611.179, - "kernel_overhead": 348759.284, - "profiling_overhead": 50714.168, - "profiling_runs": 409346.205, + "compilation_time": 23323.227, + "data": 80089.145, + "framework": 914847.747, + "kernel_overhead": 357593.026, + "profiling_overhead": 66368.003, + "profiling_runs": 410797.573, "runtimes": [ - 9508.896 + 8132.608 ], - "search_algorithm": 40.48, - "validation": 31.964 + "search_algorithm": 43.786, + "validation": 35.064 }, - "timestamp": "2026-03-02 14:27:32 UTC" + "timestamp": "2026-03-05 08:58:38 UTC" }, { "compilation_data": { @@ -12887,49 +12887,49 @@ { "name": "time", "unit": "", - "value": 9398.528 + "value": 9279.904 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.2676507097324405 + "value": 8.001251512436616 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 20564.0 + "value": 17808.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2199480.0 + "value": 2199888.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 72.92190877119107 + "value": 72.82215886795757 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98308535.0 + "value": 98703595.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424881.0 + "value": 138427456.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.28054295223748316 + "value": 0.28067120444000637 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -12941,7 +12941,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013036858534680017 + "value": 0.013003059020427754 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12971,13 +12971,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.10195353762236 + "value": 88.69117901895379 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.45355258691886 + "value": 97.97198595007198 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13037,7 +13037,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.52621057339055 + "value": 7.547234655300931 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13049,13 +13049,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.079243512960602 + "value": 18.121006473154637 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.346386264329608 + "value": 9.367976368873277 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13067,7 +13067,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.770109192659916 + "value": 23.824967123982642 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13086,47 +13086,47 @@ "time" ], "times": { - "compilation": 30296.05, - "data": 61171.618, - "framework": 867303.194, - "kernel_overhead": 347413.618, - "profiling_overhead": 51298.714, - "profiling_runs": 407419.244, + "compilation_time": 23771.51, + "data": 78763.167, + "framework": 895898.473, + "kernel_overhead": 346061.35, + "profiling_overhead": 65423.325, + "profiling_runs": 405650.631, "runtimes": [ - 9398.528 + 9279.904 ], - "search_algorithm": 41.653, - "validation": 29.863 + "search_algorithm": 39.446, + "validation": 35.495 }, - "timestamp": "2026-03-02 14:27:32 UTC" + "timestamp": "2026-03-05 08:58:38 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 8 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -13134,61 +13134,61 @@ { "name": "time", "unit": "", - "value": 9509.792 + "value": 3399.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.8511307814775 + "value": 18.509073742158495 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5716.0 + "value": 6192.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2195852.0 + "value": 1844360.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.37583480007939 + "value": 1.8172343892091058 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118220948.0 + "value": 62920.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415459.0 + "value": 2100417.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27378220858938157 + "value": 6.060097827220842 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012624479235094637 + "value": 0.28394320581307164 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13206,25 +13206,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.42755948731518 + "value": 98.19408973122896 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.72570417019179 + "value": 99.94514493916526 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13236,7 +13236,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13248,43 +13248,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3461873664.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2218786816.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 38797312.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 352927744.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.270002990692716 + "value": 41.3836024506563 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13296,13 +13296,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.459102936288257 + "value": 48.48623761390381 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.025793571189059 + "value": 3.3618387408077837 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13314,7 +13314,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.954783174504904 + "value": 87.59777216452915 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13333,47 +13333,47 @@ "time" ], "times": { - "compilation": 30757.453, - "data": 61056.166, - "framework": 869377.564, - "kernel_overhead": 348348.615, - "profiling_overhead": 51026.657, - "profiling_runs": 408946.126, + "compilation_time": 23406.721, + "data": 77242.536, + "framework": 1386886.187, + "kernel_overhead": 608250.803, + "profiling_overhead": 63892.4, + "profiling_runs": 637500.448, "runtimes": [ - 9509.792 + 3399.264 ], - "search_algorithm": 39.567, - "validation": 30.15 + "search_algorithm": 56.615, + "validation": 28.056 }, - "timestamp": "2026-03-02 14:27:33 UTC" + "timestamp": "2026-03-05 08:58:39 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 8 + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -13381,61 +13381,61 @@ { "name": "time", "unit": "", - "value": 8615.424 + "value": 4047.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.742860837925183 + "value": 15.477310173457559 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 21212.0 + "value": 7644.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2206120.0 + "value": 1838024.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.328921800421945 + "value": 1.5318953586647863 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 47492954.0 + "value": 75340.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420473.0 + "value": 2103183.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2991866994355979 + "value": 2.539986567751393 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01396465451390183 + "value": 0.11902664911553117 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13453,25 +13453,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.62800425837503 + "value": 98.94623423626844 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.01881361884294 + "value": 99.96343575658179 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13483,7 +13483,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13495,43 +13495,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3461873664.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2218786816.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 38797312.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 352927744.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.017007825071438 + "value": 40.970317759883955 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13543,13 +13543,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.25533906082439 + "value": 40.642623565913496 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.95438976105851 + "value": 1.4883773278532773 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13561,7 +13561,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.316425167542377 + "value": 90.27550404250832 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13580,47 +13580,47 @@ "time" ], "times": { - "compilation": 31116.423, - "data": 61932.267, - "framework": 866456.7849999999, - "kernel_overhead": 348112.495, - "profiling_overhead": 51884.067, - "profiling_runs": 404527.956, + "compilation_time": 22972.043, + "data": 78807.527, + "framework": 1972599.514, + "kernel_overhead": 898065.971, + "profiling_overhead": 65225.148, + "profiling_runs": 930500.868, "runtimes": [ - 8615.424 + 4047.936 ], - "search_algorithm": 52.544, - "validation": 32.881 + "search_algorithm": 44.179, + "validation": 35.973 }, - "timestamp": "2026-03-02 14:27:33 UTC" + "timestamp": "2026-03-05 08:58:40 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 64 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 23 + "private_memory_size": 64, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -13628,61 +13628,61 @@ { "name": "time", "unit": "", - "value": 3352.448 + "value": 8093.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.463997404861429 + "value": 8.424993586454592 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 32.0 + "value": 17016.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869096.0 + "value": 2000712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8189742325960938 + "value": 55.89334807060442 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 57792.0 + "value": 21186501.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098973.0 + "value": 138418150.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.059798653871545 + "value": 0.6337007595617721 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28393197024190536 + "value": 0.029633363810746407 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13700,25 +13700,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.33114496342763 + "value": 90.4125432429761 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94874611876746 + "value": 99.52415486914897 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13730,7 +13730,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13742,7 +13742,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4945084416.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -13754,31 +13754,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484966400.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.380837478655295 + "value": 9.026673073664881 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13790,13 +13790,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48257212121867 + "value": 20.32643203190243 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3615845904360606 + "value": 5.592746313465342 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13808,7 +13808,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59124507612215 + "value": 20.170363069145225 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13827,47 +13827,47 @@ "time" ], "times": { - "compilation": 94634.438, - "data": 60083.152, - "framework": 1356435.458, - "kernel_overhead": 608405.435, - "profiling_overhead": 49980.133, - "profiling_runs": 637966.738, + "compilation_time": 23685.444, + "data": 78792.443, + "framework": 588432.939, + "kernel_overhead": 195792.933, + "profiling_overhead": 65366.243, + "profiling_runs": 248481.32, "runtimes": [ - 3352.448 + 8093.376 ], - "search_algorithm": 48.021, - "validation": 30.752 + "search_algorithm": 43.401, + "validation": 29.755 }, - "timestamp": "2026-03-02 14:27:34 UTC" + "timestamp": "2026-03-05 08:58:41 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 64 + "x": 8, + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 23 + "private_memory_size": 128, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -13875,61 +13875,61 @@ { "name": "time", "unit": "", - "value": 3396.768 + "value": 9380.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.36681754533704 + "value": 7.935433393469489 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6384.0 + "value": 16336.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871628.0 + "value": 2196332.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8164946909438755 + "value": 81.83186492217116 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64414.0 + "value": 133023845.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099947.0 + "value": 138417382.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.059694514856238 + "value": 0.2742172787979654 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2839488485375063 + "value": 0.012726487829270685 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13947,25 +13947,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.00250437119865 + "value": 99.0298395274492 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95648274626 + "value": 98.52937991741578 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13977,7 +13977,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -13989,7 +13989,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4945084416.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -14001,31 +14001,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484966400.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.380733644986755 + "value": 7.274085888799478 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14037,13 +14037,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48170138874522 + "value": 17.635245511671922 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.361524217383702 + "value": 4.708042228274718 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14055,7 +14055,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.58970447311066 + "value": 16.573110907051056 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14074,19 +14074,19 @@ "time" ], "times": { - "compilation": 33275.84, - "data": 60925.981, - "framework": 1360662.2230000002, - "kernel_overhead": 609757.425, - "profiling_overhead": 50514.875, - "profiling_runs": 639463.942, + "compilation_time": 23132.867, + "data": 77279.863, + "framework": 571764.99, + "kernel_overhead": 185232.692, + "profiling_overhead": 63943.809, + "profiling_runs": 245308.626, "runtimes": [ - 3396.768 + 9380.8 ], - "search_algorithm": 45.832, - "validation": 34.964 + "search_algorithm": 46.325, + "validation": 30.417 }, - "timestamp": "2026-03-02 14:27:35 UTC" + "timestamp": "2026-03-05 08:58:41 UTC" }, { "compilation_data": { @@ -14094,7 +14094,7 @@ "global_size": { "x": 8, "y": 128, - "z": 64 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -14104,17 +14104,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 23 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -14122,61 +14122,61 @@ { "name": "time", "unit": "", - "value": 3398.912 + "value": 2601.792 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.098026663663783 + "value": 24.50188776203581 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5796.0 + "value": 6040.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837080.0 + "value": 1840140.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8155537973196338 + "value": 2.4093822850089035 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62741.0 + "value": 50406.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100390.0 + "value": 2100392.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.059621764124358 + "value": 4.067505455968158 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28394497734193946 + "value": 0.19058289704615122 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14206,13 +14206,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.406990527254 + "value": 98.72349072947185 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95566663379624 + "value": 99.94871362166752 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14224,7 +14224,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14236,43 +14236,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4945084416.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484966400.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.380419386512884 + "value": 45.254681124120026 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14284,13 +14284,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.481436252365604 + "value": 65.08567561092036 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.361505833904256 + "value": 2.3835086283296025 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14302,7 +14302,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.58919223139921 + "value": 91.70221661719883 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14321,47 +14321,47 @@ "time" ], "times": { - "compilation": 29809.872, - "data": 60797.85, - "framework": 1360072.788, - "kernel_overhead": 609864.14, - "profiling_overhead": 50400.237, - "profiling_runs": 639010.561, + "compilation_time": 23469.892, + "data": 77637.294, + "framework": 1112721.398, + "kernel_overhead": 473300.913, + "profiling_overhead": 63343.769, + "profiling_runs": 498439.422, "runtimes": [ - 3398.912 + 2601.792 ], - "search_algorithm": 41.464, - "validation": 19.985 + "search_algorithm": 44.189, + "validation": 36.755 }, - "timestamp": "2026-03-02 14:27:36 UTC" + "timestamp": "2026-03-05 08:58:42 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 64 + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 23 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -14369,61 +14369,61 @@ { "name": "time", "unit": "", - "value": 3403.808 + "value": 3297.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.142925712778224 + "value": 19.359124331550802 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11064.0 + "value": 11464.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841700.0 + "value": 1842056.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.822262754727967 + "value": 1.9206133388378843 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 69995.0 + "value": 68513.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108500.0 + "value": 2108741.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.059354063475773 + "value": 1.5936443049944211 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28392739425680136 + "value": 0.0746348703978352 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14453,13 +14453,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.93001345284247 + "value": 98.70011018107981 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94844728794808 + "value": 99.90539257285859 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14471,7 +14471,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14483,43 +14483,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4945084416.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484966400.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.38019964626191 + "value": 45.376938233219505 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14531,13 +14531,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.4819357058724 + "value": 50.99898702107766 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3615404639813877 + "value": 1.0334267389525011 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14549,7 +14549,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59012780330195 + "value": 95.17550193350114 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14568,47 +14568,47 @@ "time" ], "times": { - "compilation": 28356.265, - "data": 61497.455, - "framework": 1357287.657, - "kernel_overhead": 607559.519, - "profiling_overhead": 51603.535, - "profiling_runs": 636627.148, + "compilation_time": 23536.751, + "data": 78036.38, + "framework": 1868428.9, + "kernel_overhead": 848790.187, + "profiling_overhead": 64279.852, + "profiling_runs": 877322.481, "runtimes": [ - 3403.808 + 3297.536 ], - "search_algorithm": 41.388, - "validation": 28.226 + "search_algorithm": 44.023, + "validation": 27.496 }, - "timestamp": "2026-03-02 14:27:37 UTC" + "timestamp": "2026-03-05 08:58:43 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 64 + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 23 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -14616,61 +14616,61 @@ { "name": "time", "unit": "", - "value": 3484.384 + "value": 5451.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.99995281740578 + "value": 11.597336379313123 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9784.0 + "value": 3760.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841740.0 + "value": 1838424.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8153360883506287 + "value": 1.164163135028142 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 66549.0 + "value": 97656.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104896.0 + "value": 2105050.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.059620144543105 + "value": 0.476759406602004 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2839578051827152 + "value": 0.022341655697049476 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14700,13 +14700,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.80578403456192 + "value": 73.90061101697658 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95539130017573 + "value": 99.9677684162297 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14718,7 +14718,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14730,43 +14730,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4945084416.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1665138688.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484966400.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.38190775977563 + "value": 42.99364301998961 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14778,13 +14778,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.4837600595052 + "value": 30.513642275209524 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.361666957250849 + "value": 0.3687561749567557 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14796,7 +14796,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59340897025467 + "value": 96.94793209624359 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14815,47 +14815,47 @@ "time" ], "times": { - "compilation": 19745.86, - "data": 62509.65, - "framework": 1358657.7179999999, - "kernel_overhead": 607221.671, - "profiling_overhead": 52582.549, - "profiling_runs": 636343.848, + "compilation_time": 24764.408, + "data": 77422.538, + "framework": 3095903.091, + "kernel_overhead": 1457488.694, + "profiling_overhead": 64440.561, + "profiling_runs": 1496551.298, "runtimes": [ - 3484.384 + 5451.712 ], - "search_algorithm": 43.755, - "validation": 23.941 + "search_algorithm": 46.412, + "validation": 27.653 }, - "timestamp": "2026-03-02 14:27:37 UTC" + "timestamp": "2026-03-05 08:58:44 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 32 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 27 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -14863,61 +14863,61 @@ { "name": "time", "unit": "", - "value": 4072.64 + "value": 3236.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.117185704435762 + "value": 19.265996953919647 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16528.0 + "value": 848.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873024.0 + "value": 1836900.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5405828752228645 + "value": 1.8952988077900839 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 87461.0 + "value": 55545.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102184.0 + "value": 2100018.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5396878604707247 + "value": 1.5876859733886626 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1190117837937098 + "value": 0.07440067134980788 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14947,13 +14947,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.96617049643093 + "value": 98.72689018453617 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95881382234508 + "value": 99.96462138486484 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14965,7 +14965,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -14977,43 +14977,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5463080960.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1369440256.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 596246528.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.96724832328605 + "value": 42.0326671277218 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15025,13 +15025,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.63942669472938 + "value": 50.80883376988963 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4882602549339372 + "value": 1.0295735358644627 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15043,7 +15043,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.26848126282322 + "value": 89.26341767533044 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15062,47 +15062,47 @@ "time" ], "times": { - "compilation": 48844.595, - "data": 63417.702, - "framework": 1939053.022, - "kernel_overhead": 894953.386, - "profiling_overhead": 53547.666, - "profiling_runs": 927134.268, + "compilation_time": 23512.381, + "data": 77507.289, + "framework": 1694643.151, + "kernel_overhead": 762262.941, + "profiling_overhead": 64409.014, + "profiling_runs": 790463.907, "runtimes": [ - 4072.64 + 3236.864 ], - "search_algorithm": 29.811, - "validation": 21.877 + "search_algorithm": 47.096, + "validation": 30.354 }, - "timestamp": "2026-03-02 14:27:38 UTC" + "timestamp": "2026-03-05 08:58:45 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 32 + "x": 8, + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 27 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -15110,61 +15110,61 @@ { "name": "time", "unit": "", - "value": 4282.368 + "value": 6181.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.928254757785467 + "value": 10.173916819077595 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 364.0 + "value": 848.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870536.0 + "value": 1836200.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5173546693220548 + "value": 1.026428799161382 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 69956.0 + "value": 104275.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2102966.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539770535466052 + "value": 0.41935248933971053 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11901544934433993 + "value": 0.01965289230502901 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15194,13 +15194,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.86081955637958 + "value": 82.00415189596089 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96350169883706 + "value": 99.97307413410681 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15212,7 +15212,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15224,43 +15224,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5463080960.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1369440256.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 596246528.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.96599146614326 + "value": 42.84809606037587 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15272,13 +15272,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.638772504445626 + "value": 26.83997585637113 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4882362977702255 + "value": 0.32436005978768817 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15290,7 +15290,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.26705663391003 + "value": 93.65858731422863 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15309,19 +15309,19 @@ "time" ], "times": { - "compilation": 18435.673, - "data": 65158.176, - "framework": 1944635.4619999998, - "kernel_overhead": 896169.613, - "profiling_overhead": 54587.422, - "profiling_runs": 928720.251, + "compilation_time": 23230.539, + "data": 76913.032, + "framework": 3582615.74, + "kernel_overhead": 1699810.697, + "profiling_overhead": 63537.714, + "profiling_runs": 1742354.297, "runtimes": [ - 4282.368 + 6181.76 ], - "search_algorithm": 26.742, - "validation": 22.104 + "search_algorithm": 46.441, + "validation": 27.481 }, - "timestamp": "2026-03-02 14:27:39 UTC" + "timestamp": "2026-03-05 08:58:47 UTC" }, { "compilation_data": { @@ -15329,7 +15329,7 @@ "global_size": { "x": 8, "y": 128, - "z": 32 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -15339,17 +15339,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 27 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -15357,61 +15357,61 @@ { "name": "time", "unit": "", - "value": 4194.976 + "value": 5798.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.73957775560207 + "value": 10.71859025945962 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11352.0 + "value": 6384.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840804.0 + "value": 1834840.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5317490750287563 + "value": 1.075070913951803 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 81991.0 + "value": 101825.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105283.0 + "value": 2100504.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5397596779144793 + "value": 0.4409068398994503 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11901621591099004 + "value": 0.020657350237572977 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15441,13 +15441,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.97112029932066 + "value": 82.02275930736741 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96374254350768 + "value": 99.94463645635663 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15459,7 +15459,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15471,43 +15471,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5463080960.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1369440256.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 596246528.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.96629499532065 + "value": 43.94866102540273 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15519,13 +15519,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.638936342281184 + "value": 28.219792334744948 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4882422976909613 + "value": 0.34103508802975463 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15537,7 +15537,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.26739208971271 + "value": 94.50506726623156 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15556,19 +15556,19 @@ "time" ], "times": { - "compilation": 17039.957, - "data": 66272.676, - "framework": 1947224.668, - "kernel_overhead": 895975.045, - "profiling_overhead": 56511.645, - "profiling_runs": 928465.302, + "compilation_time": 23683.101, + "data": 78524.07, + "framework": 3485948.987, + "kernel_overhead": 1650338.161, + "profiling_overhead": 65473.719, + "profiling_runs": 1691613.037, "runtimes": [ - 4194.976 + 5798.496 ], - "search_algorithm": 23.727, - "validation": 21.614 + "search_algorithm": 48.541, + "validation": 33.342 }, - "timestamp": "2026-03-02 14:27:40 UTC" + "timestamp": "2026-03-05 08:58:49 UTC" }, { "compilation_data": { @@ -15576,7 +15576,7 @@ "global_size": { "x": 8, "y": 64, - "z": 32 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -15586,17 +15586,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 27 + "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -15604,61 +15604,61 @@ { "name": "time", "unit": "", - "value": 4120.672 + "value": 3624.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.722345764648278 + "value": 17.12124057908796 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 832.0 + "value": 664.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837832.0 + "value": 1837056.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5166473163703507 + "value": 1.6876085947961486 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68945.0 + "value": 61167.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099247.0 + "value": 2098982.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539837528305116 + "value": 22.575471708522592 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11901301714560333 + "value": 1.0579003620013563 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15688,13 +15688,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.89204937762483 + "value": 71.60134550855534 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96224471717508 + "value": 99.96544476254897 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15706,7 +15706,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15718,43 +15718,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5463080960.0 + "value": 587202560.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 117440512.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1369440256.0 + "value": 234881024.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 596246528.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.96601866125311 + "value": 40.12501015969425 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15766,13 +15766,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.638453015237516 + "value": 45.152684729481656 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4882245977259834 + "value": 11.993681881268566 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15784,7 +15784,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.26634698278957 + "value": 57.411629162816965 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15803,47 +15803,47 @@ "time" ], "times": { - "compilation": 15562.202, - "data": 65757.108, - "framework": 1946124.5209999997, - "kernel_overhead": 896283.391, - "profiling_overhead": 55884.839, - "profiling_runs": 928199.183, + "compilation_time": 23596.529, + "data": 77142.193, + "framework": 282727.81799999997, + "kernel_overhead": 55563.932, + "profiling_overhead": 64250.378, + "profiling_runs": 85771.315, "runtimes": [ - 4120.672 + 3624.96 ], - "search_algorithm": 23.878, - "validation": 23.571 + "search_algorithm": 45.635, + "validation": 27.32 }, - "timestamp": "2026-03-02 14:27:41 UTC" + "timestamp": "2026-03-05 08:58:49 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 27 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -15851,61 +15851,61 @@ { "name": "time", "unit": "", - "value": 4162.56 + "value": 2133.312 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.659318235004559 + "value": 29.95872224763014 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8476.0 + "value": 4916.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840912.0 + "value": 1839736.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5186864768902628 + "value": 2.932265842410314 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 78395.0 + "value": 39388.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108799.0 + "value": 2099746.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5330136797887377 + "value": 19.913038930848913 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11870473886744588 + "value": 0.9328880326533587 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15935,13 +15935,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.09285254553501 + "value": 94.21472624907025 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95913079425816 + "value": 99.92750423981782 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15953,7 +15953,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -15965,43 +15965,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5463080960.0 + "value": 704643072.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1369440256.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 596246528.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.86027404219632 + "value": 50.490437537584356 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16013,13 +16013,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.53445030798089 + "value": 79.66419861946883 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4844159048332846 + "value": 10.735995517076855 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16031,7 +16031,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.03532233059875 + "value": 76.24213631397151 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16050,47 +16050,47 @@ "time" ], "times": { - "compilation": 15915.696, - "data": 63089.127, - "framework": 1940546.814, - "kernel_overhead": 895968.485, - "profiling_overhead": 53221.368, - "profiling_runs": 928267.834, + "compilation_time": 23950.702, + "data": 77961.772, + "framework": 294393.065, + "kernel_overhead": 64428.428, + "profiling_overhead": 64746.212, + "profiling_runs": 87256.653, "runtimes": [ - 4162.56 + 2133.312 ], - "search_algorithm": 25.227, - "validation": 16.143 + "search_algorithm": 42.817, + "validation": 29.385 }, - "timestamp": "2026-03-02 14:27:42 UTC" + "timestamp": "2026-03-05 08:58:49 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 16 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -16098,61 +16098,61 @@ { "name": "time", "unit": "", - "value": 8098.688 + "value": 1844.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.506627165331737 + "value": 34.02317096285065 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12932.0 + "value": 432.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2018868.0 + "value": 1837712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.748502326790195 + "value": 3.330012303596684 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 24108366.0 + "value": 32434.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416940.0 + "value": 2099170.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6315267907101765 + "value": 11.292922785377108 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029544178621103364 + "value": 0.528496466690594 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16170,25 +16170,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.50131223155846 + "value": 95.87232169720669 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.81901433149925 + "value": 99.8199415107782 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16200,7 +16200,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16212,43 +16212,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1958739968.0 + "value": 390070272.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1178599424.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 143654912.0 + "value": 88080384.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 266371072.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.973244812625493 + "value": 45.84133442200045 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16260,13 +16260,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.20539480753887 + "value": 90.35943013997418 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.559443346703199 + "value": 6.265155800720866 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16278,7 +16278,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.050312164295043 + "value": 66.44732960844479 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16297,47 +16297,47 @@ "time" ], "times": { - "compilation": 52489.988, - "data": 65656.503, - "framework": 563246.879, - "kernel_overhead": 194829.158, - "profiling_overhead": 55941.122, - "profiling_runs": 246820.096, + "compilation_time": 23539.934, + "data": 77830.388, + "framework": 240272.68800000002, + "kernel_overhead": 38048.799, + "profiling_overhead": 64598.81, + "profiling_runs": 59794.691, "runtimes": [ - 8098.688 + 1844.96 ], - "search_algorithm": 40.751, - "validation": 18.608 + "search_algorithm": 38.978, + "validation": 28.529 }, - "timestamp": "2026-03-02 14:27:43 UTC" + "timestamp": "2026-03-05 08:58:49 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 16 + "x": 8, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -16345,61 +16345,61 @@ { "name": "time", "unit": "", - "value": 8161.6 + "value": 1827.168 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.439562550306105 + "value": 34.963153825549114 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9016.0 + "value": 112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2018624.0 + "value": 1837708.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 57.160817003935385 + "value": 3.4202413703640913 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 25645502.0 + "value": 30950.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417762.0 + "value": 2099159.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6315664294919627 + "value": 5.798459706722612 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029642669774000697 + "value": 0.2715981955783292 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16417,25 +16417,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.76967295177583 + "value": 98.32521897552597 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.07676596301671 + "value": 99.91380112759225 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16447,7 +16447,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16459,43 +16459,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1958739968.0 + "value": 362807296.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1178599424.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 143654912.0 + "value": 127926272.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 266371072.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.979819664862031 + "value": 42.63274187476548 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16507,13 +16507,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.220540073403278 + "value": 92.7854974766525 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.563610513360716 + "value": 3.3979064017328793 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16525,7 +16525,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.065362804526647 + "value": 60.07617319170131 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16544,45 +16544,45 @@ "time" ], "times": { - "compilation": 18703.241, - "data": 65461.716, - "framework": 563504.991, - "kernel_overhead": 195130.064, - "profiling_overhead": 55374.945, - "profiling_runs": 247538.266, + "compilation_time": 23490.323, + "data": 77227.873, + "framework": 228069.61599999998, + "kernel_overhead": 32559.838, + "profiling_overhead": 64309.661, + "profiling_runs": 53972.244, "runtimes": [ - 8161.6 + 1827.168 ], - "search_algorithm": 29.378, - "validation": 23.269 + "search_algorithm": 33.564, + "validation": 26.993 }, - "timestamp": "2026-03-02 14:27:43 UTC" + "timestamp": "2026-03-05 08:58:50 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, + "y": 64, "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -16592,49 +16592,49 @@ { "name": "time", "unit": "", - "value": 8129.952 + "value": 1781.696 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.414574575106736 + "value": 36.1980473454436 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7648.0 + "value": 5712.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2000120.0 + "value": 1840956.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.811780923940134 + "value": 3.5556317230549723 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 20825641.0 + "value": 36716.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416422.0 + "value": 2103924.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.633332620218868 + "value": 2.996106443989278 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -16646,7 +16646,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029646370764779623 + "value": 0.14031685253587198 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16664,25 +16664,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.30141482992077 + "value": 98.33963732657661 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.56854139650764 + "value": 99.89617118153573 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16694,7 +16694,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16706,25 +16706,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1958739968.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1178599424.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 143654912.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -16736,13 +16736,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 266371072.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.026804751904827 + "value": 39.53978342448487 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16754,13 +16754,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.32628862982027 + "value": 95.88919861978373 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.592706856886583 + "value": 1.943067257188 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16772,7 +16772,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.17027763483842 + "value": 58.363505030581685 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16791,19 +16791,19 @@ "time" ], "times": { - "compilation": 19730.875, - "data": 63840.585, - "framework": 560470.9450000001, - "kernel_overhead": 195072.837, - "profiling_overhead": 54116.453, - "profiling_runs": 247441.07, + "compilation_time": 24030.089, + "data": 77746.303, + "framework": 235694.718, + "kernel_overhead": 36053.932, + "profiling_overhead": 64475.073, + "profiling_runs": 57419.41, "runtimes": [ - 8129.952 + 1781.696 ], - "search_algorithm": 32.757, - "validation": 20.267 + "search_algorithm": 37.714, + "validation": 26.897 }, - "timestamp": "2026-03-02 14:27:44 UTC" + "timestamp": "2026-03-05 08:58:50 UTC" }, { "compilation_data": { @@ -16811,7 +16811,7 @@ "global_size": { "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -16820,18 +16820,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -16839,61 +16839,61 @@ { "name": "time", "unit": "", - "value": 8442.944 + "value": 1753.888 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.384312393957191 + "value": 35.70606733488811 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6240.0 + "value": 5092.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1997384.0 + "value": 1837044.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.25754447334731 + "value": 3.5303643473386868 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 22524185.0 + "value": 34342.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415408.0 + "value": 2103624.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6337465186055217 + "value": 1.4943946447970224 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02974046362885063 + "value": 0.07000559794112443 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16911,25 +16911,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.01492774753636 + "value": 81.69389834862477 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.11668829917512 + "value": 99.92762976188185 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16941,7 +16941,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -16953,43 +16953,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1958739968.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1178599424.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 143654912.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 266371072.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.005868179863894 + "value": 37.93515283515334 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17001,13 +17001,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.279159764013066 + "value": 95.65019866548326 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.57973951514715 + "value": 1.1559289145364797 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17019,7 +17019,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.12353263358683 + "value": 53.51835083010823 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17038,47 +17038,47 @@ "time" ], "times": { - "compilation": 20043.002, - "data": 64608.091, - "framework": 563132.9750000001, - "kernel_overhead": 195601.353, - "profiling_overhead": 54670.324, - "profiling_runs": 248253.207, + "compilation_time": 23732.615, + "data": 78067.903, + "framework": 214001.34500000003, + "kernel_overhead": 25279.917, + "profiling_overhead": 64145.735, + "profiling_runs": 46507.79, "runtimes": [ - 8442.944 + 1753.888 ], - "search_algorithm": 34.237, - "validation": 20.391 + "search_algorithm": 40.512, + "validation": 29.52 }, - "timestamp": "2026-03-02 14:27:44 UTC" + "timestamp": "2026-03-05 08:58:50 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 16 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 32 + "private_memory_size": 0, + "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -17086,61 +17086,61 @@ { "name": "time", "unit": "", - "value": 8083.616 + "value": 5210.816 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.47670988882817 + "value": 12.066314593221472 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 25528.0 + "value": 9412.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2007276.0 + "value": 1838908.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.279410575425324 + "value": 1.2096921341787783 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 12913600.0 + "value": 94728.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138455099.0 + "value": 2100770.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6389684835840983 + "value": 7.955461407999722 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.030089848611183793 + "value": 0.37280334661389636 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17158,25 +17158,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.0573153251347 + "value": 97.35592517258348 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.37525519010097 + "value": 99.96727911504107 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17188,7 +17188,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17200,43 +17200,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1958739968.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1178599424.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 143654912.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 266371072.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.088220788894262 + "value": 36.266811313524435 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17248,13 +17248,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.464542394435934 + "value": 31.822965000154042 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.630746894172192 + "value": 4.288641767598884 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17266,7 +17266,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.30748893394529 + "value": 84.80990822108174 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17285,47 +17285,47 @@ "time" ], "times": { - "compilation": 22836.567, - "data": 63281.837, - "framework": 559219.025, - "kernel_overhead": 195251.196, - "profiling_overhead": 53367.409, - "profiling_runs": 247318.583, + "compilation_time": 23335.297, + "data": 78746.469, + "framework": 2148611.875, + "kernel_overhead": 982973.6, + "profiling_overhead": 65515.478, + "profiling_runs": 1021376.328, "runtimes": [ - 8083.616 + 5210.816 ], - "search_algorithm": 33.237, - "validation": 19.548 + "search_algorithm": 57.373, + "validation": 29.206 }, - "timestamp": "2026-03-02 14:27:44 UTC" + "timestamp": "2026-03-05 08:58:51 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 8 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -17333,61 +17333,61 @@ { "name": "time", "unit": "", - "value": 9364.0 + "value": 5781.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.336380536467666 + "value": 11.072896629110561 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22536.0 + "value": 13904.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2276680.0 + "value": 1842856.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.75423408396382 + "value": 1.1183377259524692 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132964803.0 + "value": 112627.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138426900.0 + "value": 2113504.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27442612796692684 + "value": 3.6249279096530853 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012773389570065876 + "value": 0.1698645579666433 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17405,25 +17405,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.98392110434115 + "value": 98.69086269060774 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.99088416263311 + "value": 99.96272495122425 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17435,7 +17435,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17447,43 +17447,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1817706496.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1145044992.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 252264448.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.266784923168747 + "value": 33.82979358778209 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17495,13 +17495,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.617717739555733 + "value": 29.001028040321287 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.703362877979539 + "value": 2.0108134676394642 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17513,7 +17513,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.556689857643583 + "value": 85.02099289599884 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17532,47 +17532,47 @@ "time" ], "times": { - "compilation": 101026.478, - "data": 62237.431, - "framework": 542985.752, - "kernel_overhead": 184728.898, - "profiling_overhead": 52451.488, - "profiling_runs": 243567.935, + "compilation_time": 23477.497, + "data": 78872.385, + "framework": 2737648.452, + "kernel_overhead": 1276075.27, + "profiling_overhead": 65816.104, + "profiling_runs": 1316884.693, "runtimes": [ - 9364.0 + 5781.28 ], - "search_algorithm": 33.41, - "validation": 19.369 + "search_algorithm": 47.14, + "validation": 32.859 }, - "timestamp": "2026-03-02 14:27:45 UTC" + "timestamp": "2026-03-05 08:58:53 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 8 + "x": 8, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -17580,61 +17580,61 @@ { "name": "time", "unit": "", - "value": 9450.176 + "value": 8029.343 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.211148825520924 + "value": 7.961592813766259 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2176.0 + "value": 6188.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2275656.0 + "value": 1910240.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.65517343856614 + "value": 49.18159785300471 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132932783.0 + "value": 889462.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138422298.0 + "value": 138415206.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27569904256097133 + "value": 1.2856761259097234 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012853284092535864 + "value": 0.06000813391502677 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17664,13 +17664,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.8072437073128 + "value": 97.56317908700358 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.5511554326265 + "value": 100.26879055359261 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17682,7 +17682,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17694,43 +17694,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1817706496.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1145044992.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 252264448.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.2715368546293915 + "value": 10.682569914067033 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17742,13 +17742,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.628140163796484 + "value": 20.427868196317085 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.706145329372914 + "value": 11.061770424665843 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17760,7 +17760,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.566502715796112 + "value": 30.078553575300827 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17779,47 +17779,47 @@ "time" ], "times": { - "compilation": 21008.762, - "data": 63786.551, - "framework": 546074.348, - "kernel_overhead": 184373.658, - "profiling_overhead": 53722.034, - "profiling_runs": 244192.105, + "compilation_time": 23490.085, + "data": 79529.533, + "framework": 949437.9199999999, + "kernel_overhead": 375216.291, + "profiling_overhead": 66476.082, + "profiling_runs": 428216.014, "runtimes": [ - 9450.176 + 8029.343 ], - "search_algorithm": 31.449, - "validation": 21.397 + "search_algorithm": 41.515, + "validation": 34.907 }, - "timestamp": "2026-03-02 14:27:45 UTC" + "timestamp": "2026-03-05 08:58:53 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, + "private_memory_size": 64, "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -17827,61 +17827,61 @@ { "name": "time", "unit": "", - "value": 9464.832 + "value": 8130.144 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.031319363749591 + "value": 8.257344957358661 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18116.0 + "value": 6876.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2200900.0 + "value": 1998372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.74625845698984 + "value": 53.99939718695646 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133035314.0 + "value": 15892944.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425288.0 + "value": 138415583.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27589867023953535 + "value": 0.6333093117023759 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012816380936884957 + "value": 0.029740213139162266 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17911,13 +17911,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.04167374364971 + "value": 90.08222304923612 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.25333707770244 + "value": 100.1380743639169 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17929,7 +17929,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -17941,43 +17941,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1817706496.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1145044992.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 252264448.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.271879605390187 + "value": 9.161619747373893 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17989,13 +17989,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.630270800998638 + "value": 20.27465806450921 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.7067141408427755 + "value": 10.647165404482253 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18007,7 +18007,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.56848625359018 + "value": 27.722054551437747 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18026,19 +18026,19 @@ "time" ], "times": { - "compilation": 21392.577, - "data": 64133.201, - "framework": 547006.1359999999, - "kernel_overhead": 184036.174, - "profiling_overhead": 54181.872, - "profiling_runs": 244654.889, + "compilation_time": 24488.927, + "data": 80663.798, + "framework": 926832.254, + "kernel_overhead": 362888.094, + "profiling_overhead": 67083.902, + "profiling_runs": 416196.46, "runtimes": [ - 9464.832 + 8130.144 ], - "search_algorithm": 34.309, - "validation": 26.334 + "search_algorithm": 43.027, + "validation": 28.772 }, - "timestamp": "2026-03-02 14:27:45 UTC" + "timestamp": "2026-03-05 08:58:54 UTC" }, { "compilation_data": { @@ -18059,7 +18059,7 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", @@ -18074,49 +18074,49 @@ { "name": "time", "unit": "", - "value": 9506.784 + "value": 9525.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.048709863782219 + "value": 7.888665007196603 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22892.0 + "value": 18516.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2201748.0 + "value": 2198368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.78330468492516 + "value": 76.4585764634975 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132861882.0 + "value": 118029063.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138427558.0 + "value": 138425319.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27481009626865704 + "value": 0.27324321451550376 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18128,7 +18128,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012800478417880868 + "value": 0.01260033173080384 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18158,13 +18158,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82360822107498 + "value": 94.80549509476032 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.96884238496331 + "value": 97.58778765604728 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18188,25 +18188,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1817706496.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1145044992.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18218,13 +18218,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 252264448.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.284090830425568 + "value": 7.340224717934657 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18236,13 +18236,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.65901211470774 + "value": 17.628899410814856 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.714387145369363 + "value": 9.113572876562612 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18254,7 +18254,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.595515433749398 + "value": 23.178013259556447 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18273,37 +18273,37 @@ "time" ], "times": { - "compilation": 23306.464, - "data": 63885.66, - "framework": 547191.534, - "kernel_overhead": 184414.324, - "profiling_overhead": 53996.324, - "profiling_runs": 244895.226, + "compilation_time": 23382.441, + "data": 80671.0, + "framework": 905944.874, + "kernel_overhead": 348583.718, + "profiling_overhead": 66973.521, + "profiling_runs": 409716.635, "runtimes": [ - 9506.784 + 9525.088 ], - "search_algorithm": 57.969, - "validation": 24.353 + "search_algorithm": 46.722, + "validation": 29.059 }, - "timestamp": "2026-03-02 14:27:46 UTC" + "timestamp": "2026-03-05 08:58:54 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 8 + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 23 }, "configuration": { "INNER_UNROLL_FACTOR": "2", @@ -18311,9 +18311,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -18321,61 +18321,61 @@ { "name": "time", "unit": "", - "value": 9561.728 + "value": 3433.952 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.208678205063782 + "value": 18.3361419823811 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18992.0 + "value": 6512.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2195120.0 + "value": 1837124.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.24084075612618 + "value": 1.8161804646778121 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118650667.0 + "value": 63248.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425053.0 + "value": 2100390.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.28122076679292357 + "value": 6.060310164537633 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013053688371277662 + "value": 0.28397588423545594 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18393,25 +18393,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.59199939189199 + "value": 98.24190216516125 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.13897291746345 + "value": 99.95597920982462 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18423,7 +18423,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18435,7 +18435,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1817706496.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -18447,31 +18447,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1145044992.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 252264448.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.415427416264181 + "value": 41.385627022334006 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18483,13 +18483,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.977426366005474 + "value": 48.48656174379297 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.799393489069089 + "value": 3.3618612146575204 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18501,7 +18501,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.894746266247715 + "value": 87.59846233728926 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18520,44 +18520,44 @@ "time" ], "times": { - "compilation": 23702.051, - "data": 63437.459, - "framework": 547096.266, - "kernel_overhead": 185282.208, - "profiling_overhead": 53521.15, - "profiling_runs": 244855.449, + "compilation_time": 23240.691, + "data": 79023.44, + "framework": 1407640.5980000002, + "kernel_overhead": 617034.707, + "profiling_overhead": 65232.126, + "profiling_runs": 646350.325, "runtimes": [ - 9561.728 + 3433.952 ], - "search_algorithm": 31.338, - "validation": 21.894 + "search_algorithm": 45.81, + "validation": 25.12 }, - "timestamp": "2026-03-02 14:27:46 UTC" + "timestamp": "2026-03-05 08:58:55 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "8" @@ -18568,49 +18568,49 @@ { "name": "time", "unit": "", - "value": 2522.72 + "value": 4063.68 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.41722347793904 + "value": 15.429209512257955 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7492.0 + "value": 948.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872816.0 + "value": 1838984.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.4284634417978754 + "value": 1.5275888398072635 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 51970.0 + "value": 68848.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105019.0 + "value": 2100239.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.067078425440278 + "value": 2.539983399135371 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18622,7 +18622,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19053778623406362 + "value": 0.11902843043656156 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18652,13 +18652,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.74619851322049 + "value": 98.84177420552778 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9395668002173 + "value": 99.96422920446038 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18682,13 +18682,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2776629248.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -18700,7 +18700,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -18712,13 +18712,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 378208256.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.24830711797154 + "value": 40.97043235080748 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18730,13 +18730,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.07622534650174 + "value": 40.64290921430275 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3831625493103665 + "value": 1.4883877886097197 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18748,7 +18748,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.6890246137801 + "value": 90.2762260385089 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18767,47 +18767,47 @@ "time" ], "times": { - "compilation": 88094.606, - "data": 61055.813, - "framework": 1083889.696, - "kernel_overhead": 473577.346, - "profiling_overhead": 51097.097, - "profiling_runs": 498159.44, + "compilation_time": 23662.394, + "data": 78549.809, + "framework": 1990969.145, + "kernel_overhead": 907576.984, + "profiling_overhead": 65052.088, + "profiling_runs": 939790.264, "runtimes": [ - 2522.72 + 4063.68 ], - "search_algorithm": 30.617, - "validation": 20.072 + "search_algorithm": 47.174, + "validation": 24.99 }, - "timestamp": "2026-03-02 14:27:47 UTC" + "timestamp": "2026-03-05 08:58:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 32 + "x": 8, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 31 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -18815,61 +18815,61 @@ { "name": "time", "unit": "", - "value": 2627.744 + "value": 8103.072 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.47076262104095 + "value": 8.418377557664186 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3416.0 + "value": 19748.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871724.0 + "value": 1999348.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.4052176429905185 + "value": 56.34887469636585 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 48000.0 + "value": 22681546.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102113.0 + "value": 138421239.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.066713349359005 + "value": 0.6350268317846489 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19056278441259075 + "value": 0.02958363988120223 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18887,25 +18887,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.55120653212393 + "value": 92.07325080131201 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95116930420934 + "value": 98.68810502702553 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18917,7 +18917,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -18929,43 +18929,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2776629248.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 378208256.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.24884835041067 + "value": 9.088033249189072 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18977,13 +18977,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.07720806633091 + "value": 20.4642340837701 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3831985375853604 + "value": 5.630662063576392 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18995,7 +18995,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.69045091324125 + "value": 20.307171706573023 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19014,47 +19014,47 @@ "time" ], "times": { - "compilation": 20384.524, - "data": 63645.69, - "framework": 1091409.4139999999, - "kernel_overhead": 474465.035, - "profiling_overhead": 53927.087, - "profiling_runs": 499371.602, + "compilation_time": 25607.037, + "data": 78557.001, + "framework": 591092.995, + "kernel_overhead": 197488.283, + "profiling_overhead": 64432.515, + "profiling_runs": 250615.196, "runtimes": [ - 2627.744 + 8103.072 ], - "search_algorithm": 26.466, - "validation": 18.94 + "search_algorithm": 39.189, + "validation": 34.171 }, - "timestamp": "2026-03-02 14:27:47 UTC" + "timestamp": "2026-03-05 08:58:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 32 + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 31 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -19062,61 +19062,61 @@ { "name": "time", "unit": "", - "value": 2659.488 + "value": 9451.328 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.77755386862837 + "value": 7.950785726052974 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2948.0 + "value": 22116.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839788.0 + "value": 2201804.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.4102156561897363 + "value": 81.75745871606856 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 48822.0 + "value": 132964114.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103396.0 + "value": 138457915.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.067060008388745 + "value": 0.2746642663928427 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1905566967815411 + "value": 0.012739735477556885 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19134,25 +19134,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7558574857234 + "value": 98.81132399471615 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94967511308295 + "value": 98.73398353830851 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19164,7 +19164,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19176,43 +19176,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2776629248.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 378208256.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.24841285466676 + "value": 7.2663578775552695 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19224,13 +19224,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.07610197616224 + "value": 17.61701987706072 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3831580313535974 + "value": 4.7031765711830795 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19242,7 +19242,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.68884909438201 + "value": 16.556038092918072 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19261,19 +19261,19 @@ "time" ], "times": { - "compilation": 19033.148, - "data": 63297.751, - "framework": 1091123.362, - "kernel_overhead": 474731.913, - "profiling_overhead": 53464.738, - "profiling_runs": 499628.96, + "compilation_time": 23279.235, + "data": 79947.019, + "framework": 579298.1980000001, + "kernel_overhead": 186136.893, + "profiling_overhead": 66410.338, + "profiling_runs": 246803.948, "runtimes": [ - 2659.488 + 9451.328 ], - "search_algorithm": 30.298, - "validation": 17.757 + "search_algorithm": 40.862, + "validation": 27.742 }, - "timestamp": "2026-03-02 14:27:48 UTC" + "timestamp": "2026-03-05 08:58:57 UTC" }, { "compilation_data": { @@ -19309,49 +19309,49 @@ { "name": "time", "unit": "", - "value": 2677.024 + "value": 2546.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.787851813620446 + "value": 24.708048327776442 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 860.0 + "value": 424.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839616.0 + "value": 1839784.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.408801762142962 + "value": 2.412894620607961 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46787.0 + "value": 43747.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103042.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.0668536857141415 + "value": 4.06746534039022 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -19363,7 +19363,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19056256276871525 + "value": 0.19058351778275434 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19393,13 +19393,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.54585378086554 + "value": 98.51582017087915 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95064959998611 + "value": 99.94884262156563 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19459,7 +19459,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.24949840721867 + "value": 45.25460766142512 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19471,13 +19471,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.07747075052244 + "value": 65.0858035940318 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3832081573677653 + "value": 2.3835133152111254 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19489,7 +19489,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.69082102180438 + "value": 91.70252956606136 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19508,37 +19508,37 @@ "time" ], "times": { - "compilation": 17508.109, - "data": 62891.697, - "framework": 1089749.7880000002, - "kernel_overhead": 474498.338, - "profiling_overhead": 53103.986, - "profiling_runs": 499255.767, + "compilation_time": 23288.119, + "data": 78376.159, + "framework": 1124358.2689999999, + "kernel_overhead": 478440.128, + "profiling_overhead": 64451.465, + "profiling_runs": 503090.517, "runtimes": [ - 2677.024 + 2546.048 ], - "search_algorithm": 43.959, - "validation": 22.564 + "search_algorithm": 44.897, + "validation": 24.582 }, - "timestamp": "2026-03-02 14:27:49 UTC" + "timestamp": "2026-03-05 08:58:57 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 38 }, "configuration": { "INNER_UNROLL_FACTOR": "4", @@ -19546,9 +19546,9 @@ "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -19556,61 +19556,61 @@ { "name": "time", "unit": "", - "value": 2670.336 + "value": 3253.888 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.81252286924022 + "value": 19.34830898304631 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3024.0 + "value": 4792.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839628.0 + "value": 1844572.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.4131726765850634 + "value": 1.939106682011603 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 47381.0 + "value": 59035.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103412.0 + "value": 2135552.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.078397215440323 + "value": 1.5936550347425433 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19110834519660264 + "value": 0.07468131898586562 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19640,13 +19640,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.31901853464124 + "value": 98.60052762610508 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95679435813575 + "value": 99.96615907771245 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19658,7 +19658,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19670,7 +19670,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2776629248.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19682,31 +19682,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1642070016.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 378208256.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.3758426293719 + "value": 45.37823538399237 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19718,13 +19718,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.25984442843861 + "value": 50.999705865179635 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3898868809242657 + "value": 1.0334413053735132 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19736,7 +19736,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.94779660860698 + "value": 95.1769513707578 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19755,47 +19755,47 @@ "time" ], "times": { - "compilation": 16067.589, - "data": 62094.933, - "framework": 1086389.9849999999, - "kernel_overhead": 473724.098, - "profiling_overhead": 52114.782, - "profiling_runs": 498456.172, + "compilation_time": 23768.22, + "data": 77864.859, + "framework": 1877416.517, + "kernel_overhead": 853598.773, + "profiling_overhead": 63835.132, + "profiling_runs": 882117.753, "runtimes": [ - 2670.336 + 3253.888 ], - "search_algorithm": 26.455, - "validation": 19.787 + "search_algorithm": 46.577, + "validation": 27.652 }, - "timestamp": "2026-03-02 14:27:49 UTC" + "timestamp": "2026-03-05 08:58:58 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 56 }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -19803,61 +19803,61 @@ { "name": "time", "unit": "", - "value": 3213.504 + "value": 5451.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.16122796164553 + "value": 11.573961335351227 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9960.0 + "value": 680.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872264.0 + "value": 1839628.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9162748089835022 + "value": 1.1631086606762437 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 65510.0 + "value": 91197.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104996.0 + "value": 2103107.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5934781092340113 + "value": 0.4767430909031349 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07466985115432917 + "value": 0.022331221814056303 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19887,13 +19887,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69688563333997 + "value": 73.86509127756061 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96559762678942 + "value": 99.92342201876644 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19905,7 +19905,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -19917,7 +19917,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3234856960.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -19929,31 +19929,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1294991360.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 500957184.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37119267639875 + "value": 42.9925449099325 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19965,13 +19965,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.99216090152354 + "value": 30.5129277008401 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0332884167056773 + "value": 0.3687475393534143 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19983,7 +19983,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16285917962016 + "value": 96.94569539701739 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20002,32 +20002,32 @@ "time" ], "times": { - "compilation": 54340.671, - "data": 64163.176, - "framework": 1842479.9309999999, - "kernel_overhead": 848203.714, - "profiling_overhead": 53723.112, - "profiling_runs": 876389.929, + "compilation_time": 23034.957, + "data": 79816.431, + "framework": 3117125.7630000003, + "kernel_overhead": 1466114.806, + "profiling_overhead": 65633.463, + "profiling_runs": 1505561.063, "runtimes": [ - 3213.504 + 5451.936 ], - "search_algorithm": 31.027, - "validation": 18.539 + "search_algorithm": 47.707, + "validation": 29.484 }, - "timestamp": "2026-03-02 14:27:50 UTC" + "timestamp": "2026-03-05 08:59:0 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, + "x": 8, + "y": 64, "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, @@ -20035,12 +20035,12 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -20050,49 +20050,49 @@ { "name": "time", "unit": "", - "value": 3385.12 + "value": 3232.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.290686126185923 + "value": 19.305513620504563 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2372.0 + "value": 5568.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871688.0 + "value": 1835900.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8945074268046067 + "value": 1.8989198248388937 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 57802.0 + "value": 59126.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099494.0 + "value": 2100258.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5934279485377247 + "value": 1.5877180670560347 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -20104,7 +20104,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07466641194531343 + "value": 0.07440149122676361 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20134,13 +20134,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.59483671243349 + "value": 98.6901638432784 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96061393102192 + "value": 99.96628493216011 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20164,13 +20164,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3234856960.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -20182,7 +20182,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1294991360.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -20194,13 +20194,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 500957184.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37153052573322 + "value": 42.0322090082976 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20212,13 +20212,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.99235443856001 + "value": 50.80854814728028 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.03329233847668 + "value": 1.0295677481016268 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20230,7 +20230,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16325740578608 + "value": 89.26302357809035 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20249,47 +20249,47 @@ "time" ], "times": { - "compilation": 15998.424, - "data": 64324.166, - "framework": 1840050.808, - "kernel_overhead": 846582.076, - "profiling_overhead": 54017.151, - "profiling_runs": 875127.415, + "compilation_time": 23607.744, + "data": 77471.678, + "framework": 1704561.339, + "kernel_overhead": 767081.195, + "profiling_overhead": 64497.04, + "profiling_runs": 795511.426, "runtimes": [ - 3385.12 + 3232.704 ], - "search_algorithm": 26.279, - "validation": 18.72 + "search_algorithm": 47.313, + "validation": 26.932 }, - "timestamp": "2026-03-02 14:27:51 UTC" + "timestamp": "2026-03-05 08:59:1 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 16 + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -20297,61 +20297,61 @@ { "name": "time", "unit": "", - "value": 3327.68 + "value": 6175.968 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.68314160643388 + "value": 10.190654073865238 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6704.0 + "value": 13804.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839472.0 + "value": 1836884.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9015923543155782 + "value": 1.0295931641592255 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 63061.0 + "value": 113312.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101422.0 + "value": 2102120.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.593475130740971 + "value": 0.41933893379176135 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0746693611157479 + "value": 0.019652898591427114 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20381,13 +20381,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69175088677898 + "value": 81.97702198863051 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96050831343801 + "value": 99.9771024059319 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20399,7 +20399,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20411,43 +20411,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3234856960.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1294991360.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 500957184.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.373441977810394 + "value": 42.84660159485612 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20459,13 +20459,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.99442241248047 + "value": 26.838903006557924 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0333342432216501 + "value": 0.32434709443960386 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20477,7 +20477,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16708479767712 + "value": 93.6548829783424 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20496,19 +20496,19 @@ "time" ], "times": { - "compilation": 16139.976, - "data": 65555.579, - "framework": 1846845.044, - "kernel_overhead": 848661.875, - "profiling_overhead": 55717.091, - "profiling_runs": 876910.499, + "compilation_time": 23757.858, + "data": 78038.998, + "framework": 3595039.245, + "kernel_overhead": 1704502.447, + "profiling_overhead": 65056.652, + "profiling_runs": 1747441.148, "runtimes": [ - 3327.68 + 6175.968 ], - "search_algorithm": 24.704, - "validation": 17.39 + "search_algorithm": 43.842, + "validation": 26.122 }, - "timestamp": "2026-03-02 14:27:52 UTC" + "timestamp": "2026-03-05 08:59:3 UTC" }, { "compilation_data": { @@ -20516,7 +20516,7 @@ "global_size": { "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -20526,17 +20526,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -20544,61 +20544,61 @@ { "name": "time", "unit": "", - "value": 3351.584 + "value": 5800.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.654024901757174 + "value": 10.714771490750815 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5196.0 + "value": 5772.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837244.0 + "value": 1834412.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8980004428231028 + "value": 1.0775054867102731 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59351.0 + "value": 99952.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100582.0 + "value": 2100454.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5934513758413567 + "value": 0.44089897380706666 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07467210632218972 + "value": 0.02066255018714506 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20628,13 +20628,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.59558000141246 + "value": 81.97973706333657 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96676276973233 + "value": 99.97153833762317 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20646,7 +20646,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20658,43 +20658,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3234856960.0 + "value": 5813305344.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 50331648.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1294991360.0 + "value": 986185728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 500957184.0 + "value": 897892352.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37256451933955 + "value": 43.947888078140636 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20706,13 +20706,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.99310661221352 + "value": 28.219300204131258 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0333075802767877 + "value": 0.34102914065051204 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20724,7 +20724,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16466113181113 + "value": 94.5034632208357 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20743,19 +20743,19 @@ "time" ], "times": { - "compilation": 16299.225, - "data": 63476.446, - "framework": 1841583.446, - "kernel_overhead": 848101.809, - "profiling_overhead": 53754.198, - "profiling_runs": 876250.993, + "compilation_time": 24311.589, + "data": 78860.412, + "framework": 3491183.562, + "kernel_overhead": 1652790.248, + "profiling_overhead": 65500.38, + "profiling_runs": 1694032.522, "runtimes": [ - 3351.584 + 5800.224 ], - "search_algorithm": 27.131, - "validation": 20.413 + "search_algorithm": 36.929, + "validation": 32.185 }, - "timestamp": "2026-03-02 14:27:53 UTC" + "timestamp": "2026-03-05 08:59:5 UTC" }, { "compilation_data": { @@ -20763,7 +20763,7 @@ "global_size": { "x": 8, "y": 32, - "z": 16 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -20773,17 +20773,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -20791,61 +20791,61 @@ { "name": "time", "unit": "", - "value": 3288.32 + "value": 3652.096 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.67740098097442 + "value": 17.093173019654557 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 212.0 + "value": 188.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834496.0 + "value": 1836576.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8888999233601558 + "value": 1.6843900360062083 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 53061.0 + "value": 59606.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098984.0 + "value": 2098980.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5935080642912398 + "value": 22.551583748153735 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07466766871715136 + "value": 1.0567276250792648 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20875,13 +20875,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.35688676425403 + "value": 72.73519530002119 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95648449631103 + "value": 99.95301640059463 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20893,7 +20893,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -20905,43 +20905,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3234856960.0 + "value": 587202560.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 117440512.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1294991360.0 + "value": 234881024.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 500957184.0 + "value": 341311488.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37425945303741 + "value": 40.08644467134761 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20953,13 +20953,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.9953193809951 + "value": 45.108238810937046 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.033352419097313 + "value": 11.981875934155154 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20971,7 +20971,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16883321224304 + "value": 57.355090308352175 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20990,47 +20990,47 @@ "time" ], "times": { - "compilation": 16192.956, - "data": 65112.363, - "framework": 1845724.784, - "kernel_overhead": 848646.651, - "profiling_overhead": 55420.697, - "profiling_runs": 876545.073, + "compilation_time": 23542.513, + "data": 75305.716, + "framework": 280767.43, + "kernel_overhead": 56108.562, + "profiling_overhead": 62703.69, + "profiling_runs": 86649.462, "runtimes": [ - 3288.32 + 3652.096 ], - "search_algorithm": 26.489, - "validation": 20.057 + "search_algorithm": 34.442, + "validation": 30.438 }, - "timestamp": "2026-03-02 14:27:54 UTC" + "timestamp": "2026-03-05 08:59:5 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -21038,61 +21038,61 @@ { "name": "time", "unit": "", - "value": 5388.064 + "value": 2145.696 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.082009454432734 + "value": 29.848731884057973 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8956.0 + "value": 6644.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866152.0 + "value": 1838724.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.160726919686435 + "value": 2.9231616774994507 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 97209.0 + "value": 40668.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100912.0 + "value": 2100390.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.47672694971304846 + "value": 19.82712463689837 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022342309712631857 + "value": 0.9290833892603654 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21122,13 +21122,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90037931878727 + "value": 90.14398704992769 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97638788962449 + "value": 99.91212727952865 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21140,7 +21140,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21152,43 +21152,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4706533376.0 + "value": 704643072.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1121452032.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 852869120.0 + "value": 256901120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.991188755223 + "value": 50.290458375920636 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21200,13 +21200,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.51190469892855 + "value": 79.35151088156461 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3687351764152742 + "value": 10.693855958648355 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21218,7 +21218,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.94244511462298 + "value": 75.9429847673884 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21237,47 +21237,47 @@ "time" ], "times": { - "compilation": 64159.091, - "data": 64952.944, - "framework": 3081225.75, - "kernel_overhead": 1461266.823, - "profiling_overhead": 55263.878, - "profiling_runs": 1499742.105, + "compilation_time": 24249.836, + "data": 78457.512, + "framework": 295900.991, + "kernel_overhead": 64400.751, + "profiling_overhead": 65746.26, + "profiling_runs": 87296.468, "runtimes": [ - 5388.064 + 2145.696 ], - "search_algorithm": 24.34, - "validation": 18.868 + "search_algorithm": 41.357, + "validation": 24.348 }, - "timestamp": "2026-03-02 14:27:56 UTC" + "timestamp": "2026-03-05 08:59:5 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 8 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -21285,61 +21285,61 @@ { "name": "time", "unit": "", - "value": 5482.912 + "value": 1840.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.032994877199862 + "value": 34.38343948664021 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16184.0 + "value": 508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870680.0 + "value": 1840992.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1624590262703274 + "value": 3.3561161240681017 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106743.0 + "value": 30478.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106197.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4767120449546747 + "value": 11.3842722234391 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02234145665642865 + "value": 0.5331299965345249 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21369,13 +21369,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8646226800163 + "value": 94.83346418710724 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97513244785675 + "value": 99.8585236045497 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21387,7 +21387,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21399,43 +21399,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4706533376.0 + "value": 390070272.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1121452032.0 + "value": 88080384.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 852869120.0 + "value": 197394432.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.99005497644099 + "value": 46.22532471978186 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21447,13 +21447,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.51112285763003 + "value": 91.11642764605563 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.36872572789372227 + "value": 6.317642932490186 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21465,7 +21465,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.93996104789827 + "value": 67.00403306654975 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21484,47 +21484,47 @@ "time" ], "times": { - "compilation": 15152.205, - "data": 66073.72, - "framework": 3090489.722, - "kernel_overhead": 1464608.749, - "profiling_overhead": 56390.06, - "profiling_runs": 1503417.193, + "compilation_time": 23738.494, + "data": 78891.017, + "framework": 240149.04000000004, + "kernel_overhead": 37616.868, + "profiling_overhead": 64694.833, + "profiling_runs": 58946.322, "runtimes": [ - 5482.912 + 1840.8 ], - "search_algorithm": 28.806, - "validation": 18.404 + "search_algorithm": 36.819, + "validation": 27.039 }, - "timestamp": "2026-03-02 14:27:57 UTC" + "timestamp": "2026-03-05 08:59:5 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -21532,61 +21532,61 @@ { "name": "time", "unit": "", - "value": 5449.632 + "value": 1802.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.83621420294143 + "value": 35.418826520537124 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 820.0 + "value": 172.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834104.0 + "value": 1836484.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1567665125316449 + "value": 3.4628360785498176 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 90260.0 + "value": 28648.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2098982.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4767141688854015 + "value": 5.876784090846199 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02234260626647558 + "value": 0.2753039085851221 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21616,13 +21616,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8986101026343 + "value": 97.31117234185692 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97699396124155 + "value": 99.90957768810586 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21634,7 +21634,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21646,43 +21646,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4706533376.0 + "value": 362807296.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1121452032.0 + "value": 127926272.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 852869120.0 + "value": 173801472.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.99147512222771 + "value": 43.21470186518152 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21694,13 +21694,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.51212472040053 + "value": 94.05544790751537 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.36873783536616855 + "value": 3.4444133755193618 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21712,7 +21712,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.94314416699883 + "value": 60.8984758456798 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21731,47 +21731,47 @@ "time" ], "times": { - "compilation": 17309.392, - "data": 61102.815, - "framework": 3078264.67, - "kernel_overhead": 1463694.951, - "profiling_overhead": 51257.143, - "profiling_runs": 1502209.761, + "compilation_time": 23311.967, + "data": 77615.47, + "framework": 227978.95, + "kernel_overhead": 32220.56, + "profiling_overhead": 64676.633, + "profiling_runs": 53466.287, "runtimes": [ - 5449.632 + 1802.912 ], - "search_algorithm": 25.679, - "validation": 18.053 + "search_algorithm": 36.194, + "validation": 27.27 }, - "timestamp": "2026-03-02 14:27:59 UTC" + "timestamp": "2026-03-05 08:59:5 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -21779,61 +21779,61 @@ { "name": "time", "unit": "", - "value": 5447.232 + "value": 1756.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.854275817009578 + "value": 36.20256978094085 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4500.0 + "value": 868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834220.0 + "value": 1835280.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1581313212291875 + "value": 3.5432683322657783 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 91995.0 + "value": 29178.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100070.0 + "value": 2099900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4767037878119691 + "value": 3.0041930022446444 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022342061913402694 + "value": 0.1407434870916987 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21863,13 +21863,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.86589391521635 + "value": 98.08951524677886 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97807964723252 + "value": 99.92421242977572 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21881,7 +21881,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -21893,43 +21893,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4706533376.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1121452032.0 + "value": 160432128.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 852869120.0 + "value": 163381248.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.989980957015746 + "value": 39.648535653279666 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21941,13 +21941,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.511049995557897 + "value": 96.1537597861542 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.36872484735842675 + "value": 1.9484282378541988 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21959,7 +21959,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.93972955047134 + "value": 58.524564640668764 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21978,19 +21978,19 @@ "time" ], "times": { - "compilation": 15558.579, - "data": 65500.37, - "framework": 3083473.1850000005, - "kernel_overhead": 1461866.019, - "profiling_overhead": 55833.461, - "profiling_runs": 1500273.335, + "compilation_time": 23355.022, + "data": 77540.835, + "framework": 234802.39300000004, + "kernel_overhead": 35888.629, + "profiling_overhead": 64291.736, + "profiling_runs": 57081.193, "runtimes": [ - 5447.232 + 1756.224 ], - "search_algorithm": 37.465, - "validation": 18.494 + "search_algorithm": 33.882, + "validation": 26.702 }, - "timestamp": "2026-03-02 14:28:0 UTC" + "timestamp": "2026-03-05 08:59:6 UTC" }, { "compilation_data": { @@ -22008,10 +22008,10 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", @@ -22026,49 +22026,49 @@ { "name": "time", "unit": "", - "value": 5447.36 + "value": 1752.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.85477705336656 + "value": 36.02496919324011 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7280.0 + "value": 2952.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1830252.0 + "value": 1830644.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1611545044615277 + "value": 3.541810176239485 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 95418.0 + "value": 32115.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100808.0 + "value": 2103028.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.47707855122532533 + "value": 1.4975219985175534 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -22080,7 +22080,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022357935407691713 + "value": 0.07013580135860718 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22110,13 +22110,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.75410187826006 + "value": 81.50095599888039 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9667323819178 + "value": 99.88440620523762 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22128,7 +22128,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22140,13 +22140,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4706533376.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -22158,7 +22158,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1121452032.0 + "value": 82837504.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -22170,13 +22170,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 852869120.0 + "value": 150192128.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02540643810495 + "value": 38.03346090230104 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22188,13 +22188,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.536193140745993 + "value": 95.86956672515153 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3690287012858708 + "value": 1.158579968968506 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22206,7 +22206,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.01957978720422 + "value": 53.64109199470638 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22225,47 +22225,47 @@ "time" ], "times": { - "compilation": 16034.829, - "data": 62159.137, - "framework": 3085063.4860000005, - "kernel_overhead": 1465950.479, - "profiling_overhead": 52353.403, - "profiling_runs": 1504600.467, + "compilation_time": 23522.335, + "data": 78010.587, + "framework": 214941.74300000002, + "kernel_overhead": 25393.347, + "profiling_overhead": 64737.256, + "profiling_runs": 46800.553, "runtimes": [ - 5447.36 + 1752.448 ], - "search_algorithm": 26.049, - "validation": 14.717 + "search_algorithm": 36.308, + "validation": 27.219 }, - "timestamp": "2026-03-02 14:28:2 UTC" + "timestamp": "2026-03-05 08:59:6 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 16 + "x": 8, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -22273,61 +22273,61 @@ { "name": "time", "unit": "", - "value": 3203.008 + "value": 5212.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.169488465189222 + "value": 12.077077097998561 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9020.0 + "value": 10092.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872948.0 + "value": 1838980.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9110962585700848 + "value": 1.2097525102507767 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64810.0 + "value": 93613.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102807.0 + "value": 2100774.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5875502207102619 + "value": 7.958858416767719 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07437946894221355 + "value": 0.37298925519732545 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22357,13 +22357,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7338885235761 + "value": 96.64592044196938 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9458710875116 + "value": 99.96989955956049 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22375,7 +22375,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22387,43 +22387,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2295332864.0 + "value": 8212447232.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 3321888768.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 471597056.0 + "value": 715390976.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.028436776209475 + "value": 36.28375207420477 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22435,13 +22435,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.803883720977815 + "value": 31.837999821678565 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0294732296975484 + "value": 4.2906679447184 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22453,7 +22453,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.25481809060022 + "value": 84.85002557366224 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22472,47 +22472,47 @@ "time" ], "times": { - "compilation": 75464.487, - "data": 60041.715, - "framework": 1666980.987, - "kernel_overhead": 764381.548, - "profiling_overhead": 50320.342, - "profiling_runs": 792237.382, + "compilation_time": 23618.779, + "data": 78619.611, + "framework": 2143222.122, + "kernel_overhead": 980486.144, + "profiling_overhead": 65341.179, + "profiling_runs": 1018775.188, "runtimes": [ - 3203.008 + 5212.32 ], - "search_algorithm": 27.797, - "validation": 18.135 + "search_algorithm": 44.546, + "validation": 27.933 }, - "timestamp": "2026-03-02 14:28:3 UTC" + "timestamp": "2026-03-05 08:59:7 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 16 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -22520,61 +22520,61 @@ { "name": "time", "unit": "", - "value": 3377.12 + "value": 5763.552 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.785001462017785 + "value": 10.988757674333364 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6520.0 + "value": 2324.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870404.0 + "value": 1839652.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8798335371609292 + "value": 1.1071385220582493 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59602.0 + "value": 97932.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100189.0 + "value": 2103766.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5874844139556488 + "value": 3.621197714728821 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07439232945340526 + "value": 0.1696422103173402 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22604,13 +22604,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68824397372421 + "value": 98.52189164443634 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96836165454401 + "value": 99.93498679381365 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22622,7 +22622,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22634,43 +22634,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2295332864.0 + "value": 10049552384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 471597056.0 + "value": 786956288.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.02633914578213 + "value": 33.79543895989285 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22682,13 +22682,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.801236244143496 + "value": 28.971105605447793 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0294195820956813 + "value": 2.0087387675652284 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22700,7 +22700,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.25020244902174 + "value": 84.93327551332058 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22719,47 +22719,47 @@ "time" ], "times": { - "compilation": 16453.457, - "data": 65358.592, - "framework": 1683321.605, - "kernel_overhead": 767100.047, - "profiling_overhead": 55410.052, - "profiling_runs": 795452.914, + "compilation_time": 23314.469, + "data": 84564.626, + "framework": 2741347.61, + "kernel_overhead": 1272550.615, + "profiling_overhead": 70901.178, + "profiling_runs": 1313331.191, "runtimes": [ - 3377.12 + 5763.552 ], - "search_algorithm": 25.974, - "validation": 17.577 + "search_algorithm": 43.733, + "validation": 32.403 }, - "timestamp": "2026-03-02 14:28:4 UTC" + "timestamp": "2026-03-05 08:59:8 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 16 + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -22767,61 +22767,61 @@ { "name": "time", "unit": "", - "value": 3360.128 + "value": 7988.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.659424001460536 + "value": 7.981624849607649 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11280.0 + "value": 7120.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838744.0 + "value": 1911964.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.900716269863505 + "value": 49.16923766223257 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 69607.0 + "value": 701509.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101102.0 + "value": 138415583.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5875443199915216 + "value": 1.2788183418797983 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07439477057071711 + "value": 0.060075637221961346 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22839,25 +22839,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.72629969824082 + "value": 96.40039340703119 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96751226290753 + "value": 100.10358630543877 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22869,7 +22869,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -22881,43 +22881,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2295332864.0 + "value": 3982491648.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 155189248.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 471597056.0 + "value": 395247616.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.02814664815189 + "value": 10.711895970282031 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22929,13 +22929,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80333489681836 + "value": 20.48459826656783 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0294621085048643 + "value": 11.092489979308457 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22947,7 +22947,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.25385559202846 + "value": 30.162099901977125 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22966,45 +22966,45 @@ "time" ], "times": { - "compilation": 17358.989, - "data": 64591.7, - "framework": 1678051.1979999999, - "kernel_overhead": 765160.363, - "profiling_overhead": 54832.109, - "profiling_runs": 793467.026, + "compilation_time": 23038.686, + "data": 77706.384, + "framework": 945479.5329999999, + "kernel_overhead": 375227.556, + "profiling_overhead": 64479.427, + "profiling_runs": 428066.166, "runtimes": [ - 3360.128 + 7988.736 ], - "search_algorithm": 24.154, - "validation": 20.389 + "search_algorithm": 48.681, + "validation": 31.207 }, - "timestamp": "2026-03-02 14:28:5 UTC" + "timestamp": "2026-03-05 08:59:9 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -23014,49 +23014,49 @@ { "name": "time", "unit": "", - "value": 3319.872 + "value": 8049.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.693710720698661 + "value": 8.299550297587416 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5664.0 + "value": 856.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837816.0 + "value": 1998260.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8900411431436674 + "value": 51.79046508667577 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59762.0 + "value": 8378613.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101478.0 + "value": 138414780.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.587526558011264 + "value": 0.6468986545470891 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -23068,7 +23068,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07439480660316873 + "value": 0.02998399048164469 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23086,25 +23086,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68385884476073 + "value": 85.22455523897554 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96916808861167 + "value": 99.90366075291342 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23128,25 +23128,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2295332864.0 + "value": 3635412992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 2252341248.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 77594624.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -23158,13 +23158,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 471597056.0 + "value": 367034368.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.02746008880738 + "value": 9.259012466563266 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23176,13 +23176,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80251802843808 + "value": 20.488809600375443 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0294455557520412 + "value": 10.759626330665913 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23194,7 +23194,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.25245435303523 + "value": 28.014880678049842 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23213,19 +23213,19 @@ "time" ], "times": { - "compilation": 17994.515, - "data": 66633.242, - "framework": 1684941.884, - "kernel_overhead": 766842.209, - "profiling_overhead": 56452.674, - "profiling_runs": 795013.759, + "compilation_time": 24411.875, + "data": 78500.195, + "framework": 918128.9310000001, + "kernel_overhead": 360829.543, + "profiling_overhead": 65240.154, + "profiling_runs": 413559.039, "runtimes": [ - 3319.872 + 8049.568 ], - "search_algorithm": 33.808, - "validation": 20.738 + "search_algorithm": 55.91, + "validation": 31.469 }, - "timestamp": "2026-03-02 14:28:6 UTC" + "timestamp": "2026-03-05 08:59:9 UTC" }, { "compilation_data": { @@ -23233,7 +23233,7 @@ "global_size": { "x": 8, "y": 32, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -23242,18 +23242,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -23261,61 +23261,61 @@ { "name": "time", "unit": "", - "value": 3318.08 + "value": 8524.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.815897435897435 + "value": 8.594076310712438 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8176.0 + "value": 6720.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840848.0 + "value": 2205596.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9025900451083488 + "value": 61.277786915256804 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62722.0 + "value": 47211445.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101382.0 + "value": 138415967.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.596029895118679 + "value": 0.30079414426282525 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 2097152.0 + "value": 1048576.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07478838411260522 + "value": 0.014027945887472722 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23333,25 +23333,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.52806345449801 + "value": 75.82181887423913 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95470636855835 + "value": 99.30158362985749 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23363,7 +23363,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23375,43 +23375,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2295332864.0 + "value": 3461873664.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 83886080.0 + "value": 2218786816.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 38797312.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 471597056.0 + "value": 352927744.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.255780963791054 + "value": 8.03136331249332 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23423,13 +23423,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 51.07867227310315 + "value": 19.287529380955394 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0350414547528228 + "value": 9.971031119182875 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23441,7 +23441,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.73765883594822 + "value": 25.358748173548225 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23460,47 +23460,47 @@ "time" ], "times": { - "compilation": 17384.471, - "data": 62550.66, - "framework": 1671774.809, - "kernel_overhead": 764197.902, - "profiling_overhead": 52661.114, - "profiling_runs": 792365.133, + "compilation_time": 23578.843, + "data": 78144.283, + "framework": 898934.0789999999, + "kernel_overhead": 350134.852, + "profiling_overhead": 65090.84, + "profiling_runs": 405564.104, "runtimes": [ - 3318.08 + 8524.96 ], - "search_algorithm": 27.633, - "validation": 18.324 + "search_algorithm": 46.635, + "validation": 28.326 }, - "timestamp": "2026-03-02 14:28:7 UTC" + "timestamp": "2026-03-05 08:59:10 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -23508,61 +23508,61 @@ { "name": "time", "unit": "", - "value": 6073.984 + "value": 3445.024 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.041682399229515 + "value": 18.378487602464233 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8656.0 + "value": 9220.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872372.0 + "value": 1838956.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0355783801930365 + "value": 1.8180915528614652 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 112644.0 + "value": 65594.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108633.0 + "value": 2100634.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4193345887558227 + "value": 6.060092967716212 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019651220894467667 + "value": 0.2839351357946128 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23592,13 +23592,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.00619466328523 + "value": 97.71803381668222 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9707784442227 + "value": 99.93649791294504 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23610,7 +23610,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23622,43 +23622,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6215958528.0 + "value": 4945084416.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1355284480.0 + "value": 1665138688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 936706048.0 + "value": 484966400.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84554831921402 + "value": 41.385593063683345 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23670,13 +23670,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.838309499493267 + "value": 48.48905473736537 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32433992192991135 + "value": 3.362034068704044 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23688,7 +23688,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.6528119236993 + "value": 87.6029746244573 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23707,47 +23707,47 @@ "time" ], "times": { - "compilation": 86913.895, - "data": 60811.894, - "framework": 3576535.886, - "kernel_overhead": 1711699.383, - "profiling_overhead": 50302.698, - "profiling_runs": 1753721.911, + "compilation_time": 22903.166, + "data": 77845.534, + "framework": 1395668.1, + "kernel_overhead": 611881.111, + "profiling_overhead": 64611.143, + "profiling_runs": 641330.312, "runtimes": [ - 6073.984 + 3445.024 ], - "search_algorithm": 34.206, - "validation": 16.313 + "search_algorithm": 45.225, + "validation": 29.896 }, - "timestamp": "2026-03-02 14:28:8 UTC" + "timestamp": "2026-03-05 08:59:11 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -23755,61 +23755,61 @@ { "name": "time", "unit": "", - "value": 6226.496 + "value": 4075.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.933963395571462 + "value": 15.402028069913992 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14028.0 + "value": 8840.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870056.0 + "value": 1843228.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0239426465834878 + "value": 1.5315372854454459 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114453.0 + "value": 76959.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101756.0 + "value": 2109265.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41932558369979606 + "value": 2.5332294820762513 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01965205122122913 + "value": 0.1186618271671367 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23839,13 +23839,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97948506332897 + "value": 98.12642663888748 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97705861313837 + "value": 99.9106335054918 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23857,7 +23857,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -23869,43 +23869,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6215958528.0 + "value": 5463080960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1355284480.0 + "value": 1369440256.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 936706048.0 + "value": 596246528.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84463688285275 + "value": 40.866962747809325 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23917,13 +23917,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.83775755450373 + "value": 40.539465705776266 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32433325169627314 + "value": 1.4845995741861426 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23935,7 +23935,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.65088590073955 + "value": 90.04646255708984 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23954,47 +23954,47 @@ "time" ], "times": { - "compilation": 15752.352, - "data": 61644.977, - "framework": 3581174.9809999997, - "kernel_overhead": 1712686.358, - "profiling_overhead": 51568.555, - "profiling_runs": 1755275.091, + "compilation_time": 23562.709, + "data": 78056.506, + "framework": 1978049.5779999997, + "kernel_overhead": 901502.668, + "profiling_overhead": 64437.894, + "profiling_runs": 934052.51, "runtimes": [ - 6226.496 + 4075.296 ], - "search_algorithm": 24.984, - "validation": 16.516 + "search_algorithm": 57.267, + "validation": 25.757 }, - "timestamp": "2026-03-02 14:28:10 UTC" + "timestamp": "2026-03-05 08:59:12 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 48 + "private_memory_size": 64, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -24002,61 +24002,61 @@ { "name": "time", "unit": "", - "value": 6361.248 + "value": 8054.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.779906156682226 + "value": 8.337191522746194 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10036.0 + "value": 1828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837444.0 + "value": 1997732.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0231079922717738 + "value": 53.21900155539085 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111509.0 + "value": 12835159.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102889.0 + "value": 138414846.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4193307938405543 + "value": 0.6426133288500948 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019652322145445034 + "value": 0.029762784936339538 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24074,25 +24074,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.00532514142054 + "value": 86.91331211123594 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97792432327034 + "value": 99.24235457259728 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24104,7 +24104,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24116,43 +24116,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6215958528.0 + "value": 1958739968.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 1178599424.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1355284480.0 + "value": 143654912.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 936706048.0 + "value": 266371072.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84498441698085 + "value": 9.092342218022935 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24164,13 +24164,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.837895149555212 + "value": 20.47317525940482 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32433491452709545 + "value": 5.633122196618465 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24182,7 +24182,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.65136604139364 + "value": 20.3160555466965 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24201,45 +24201,45 @@ "time" ], "times": { - "compilation": 16184.805, - "data": 65027.935, - "framework": 3586406.308, - "kernel_overhead": 1711798.048, - "profiling_overhead": 55362.856, - "profiling_runs": 1754217.469, + "compilation_time": 23052.975, + "data": 79186.412, + "framework": 591723.954, + "kernel_overhead": 196805.399, + "profiling_overhead": 66079.039, + "profiling_runs": 249653.104, "runtimes": [ - 6361.248 + 8054.656 ], - "search_algorithm": 25.541, - "validation": 20.867 + "search_algorithm": 48.377, + "validation": 36.974 }, - "timestamp": "2026-03-02 14:28:12 UTC" + "timestamp": "2026-03-05 08:59:12 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 48 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" }, @@ -24249,49 +24249,49 @@ { "name": "time", "unit": "", - "value": 6228.736 + "value": 9240.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.7644951166400755 + "value": 7.919286503248944 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6508.0 + "value": 2196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837372.0 + "value": 2191160.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0202195732973114 + "value": 79.1084683769445 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106725.0 + "value": 118713505.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101487.0 + "value": 138416013.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4193147982646368 + "value": 0.28142749271095907 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -24303,7 +24303,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019651652077351384 + "value": 0.013038542304552167 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24321,25 +24321,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97554863038033 + "value": 94.34121754177049 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97519754564172 + "value": 99.21017959626442 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24363,25 +24363,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6215958528.0 + "value": 1817706496.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 1145044992.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1355284480.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -24393,13 +24393,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 936706048.0 + "value": 252264448.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.844807981157984 + "value": 7.400835631716535 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24411,13 +24411,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.837712047558494 + "value": 17.943679266509655 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32433270174661755 + "value": 4.790384101056715 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24429,7 +24429,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.6507271033539 + "value": 16.86303156629034 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24448,19 +24448,19 @@ "time" ], "times": { - "compilation": 15314.686, - "data": 60989.447, - "framework": 3582716.3549999995, - "kernel_overhead": 1714069.974, - "profiling_overhead": 51310.533, - "profiling_runs": 1756346.401, + "compilation_time": 23060.91, + "data": 77544.198, + "framework": 574807.198, + "kernel_overhead": 186936.679, + "profiling_overhead": 64102.402, + "profiling_runs": 246223.919, "runtimes": [ - 6228.736 + 9240.864 ], - "search_algorithm": 24.026, - "validation": 16.195 + "search_algorithm": 44.824, + "validation": 30.024 }, - "timestamp": "2026-03-02 14:28:14 UTC" + "timestamp": "2026-03-05 08:59:13 UTC" }, { "compilation_data": { @@ -24468,7 +24468,7 @@ "global_size": { "x": 8, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -24478,17 +24478,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -24496,61 +24496,61 @@ { "name": "time", "unit": "", - "value": 6185.28 + "value": 2551.072 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.767700052130848 + "value": 24.67507710760795 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 440.0 + "value": 6116.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1830520.0 + "value": 1837132.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.023956839868056 + "value": 2.426071821444852 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 100706.0 + "value": 46952.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099184.0 + "value": 2100326.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.42001284357053886 + "value": 4.078931781659618 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01968335807315302 + "value": 0.1911141263014284 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24580,13 +24580,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.95574964112167 + "value": 98.30925077305018 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96870533481155 + "value": 99.94343067353732 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24598,7 +24598,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24610,43 +24610,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 6215958528.0 + "value": 2776629248.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1355284480.0 + "value": 1642070016.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 936706048.0 + "value": 378208256.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.91654511770431 + "value": 45.383314376981495 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24658,13 +24658,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.882757758245457 + "value": 65.2705448851743 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3248770774006714 + "value": 2.3902787433535506 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24676,7 +24676,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.8079292226496 + "value": 91.96287300709014 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24695,47 +24695,47 @@ "time" ], "times": { - "compilation": 15904.724, - "data": 61010.68, - "framework": 3582105.234, - "kernel_overhead": 1713822.314, - "profiling_overhead": 51311.063, - "profiling_runs": 1755961.177, + "compilation_time": 23930.093, + "data": 76904.437, + "framework": 1115730.438, + "kernel_overhead": 475397.66, + "profiling_overhead": 63097.784, + "profiling_runs": 500330.557, "runtimes": [ - 6185.28 + 2551.072 ], - "search_algorithm": 25.589, - "validation": 17.55 + "search_algorithm": 46.746, + "validation": 27.78 }, - "timestamp": "2026-03-02 14:28:16 UTC" + "timestamp": "2026-03-05 08:59:13 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -24743,61 +24743,61 @@ { "name": "time", "unit": "", - "value": 5805.728 + "value": 3285.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.425012899157256 + "value": 19.242696392221983 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10032.0 + "value": 5100.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871024.0 + "value": 1834120.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.07813930981866 + "value": 1.9051201904603687 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 104952.0 + "value": 57355.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102872.0 + "value": 2100134.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4408702389081566 + "value": 1.5936535451639076 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020660415702763335 + "value": 0.0746825308699038 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24827,13 +24827,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.01765084994538 + "value": 98.32449121415682 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9655530484588 + "value": 99.96281124634557 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24845,7 +24845,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -24857,43 +24857,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5813305344.0 + "value": 3234856960.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 986185728.0 + "value": 1294991360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 897892352.0 + "value": 500957184.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94614183416958 + "value": 45.38005377589994 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24905,13 +24905,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.21807450595713 + "value": 51.002241505139246 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34101432813595656 + "value": 1.0334926867496477 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24923,7 +24923,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.49935943287379 + "value": 95.18175146603859 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24942,44 +24942,44 @@ "time" ], "times": { - "compilation": 73779.137, - "data": 64356.326, - "framework": 3490051.291, - "kernel_overhead": 1665283.676, - "profiling_overhead": 54349.525, - "profiling_runs": 1706061.764, + "compilation_time": 23717.074, + "data": 76407.324, + "framework": 1868483.835, + "kernel_overhead": 850240.442, + "profiling_overhead": 63186.081, + "profiling_runs": 878649.988, "runtimes": [ - 5805.728 + 3285.824 ], - "search_algorithm": 22.747, - "validation": 18.805 + "search_algorithm": 44.552, + "validation": 26.84 }, - "timestamp": "2026-03-02 14:28:18 UTC" + "timestamp": "2026-03-05 08:59:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" @@ -24990,49 +24990,49 @@ { "name": "time", "unit": "", - "value": 5896.256 + "value": 5451.52 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.334792862653446 + "value": 11.572388954133753 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9612.0 + "value": 10112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871080.0 + "value": 1831368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0815447367043287 + "value": 1.1681980496801165 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 107987.0 + "value": 100582.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109131.0 + "value": 2108924.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4408711471858047 + "value": 0.47711521982472127 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25044,7 +25044,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02066219718858926 + "value": 0.022359654681627303 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25074,13 +25074,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97649257552028 + "value": 65.73993089953713 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97360884796042 + "value": 99.96950885875762 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25104,13 +25104,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5813305344.0 + "value": 4706533376.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -25122,7 +25122,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 986185728.0 + "value": 1121452032.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -25134,13 +25134,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 897892352.0 + "value": 852869120.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.946291752640626 + "value": 43.02746772980107 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25152,13 +25152,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.218233678440164 + "value": 30.53769314979895 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3410162517291963 + "value": 0.3690468288366816 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25170,7 +25170,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.49989006696893 + "value": 97.02434561532576 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25189,47 +25189,47 @@ "time" ], "times": { - "compilation": 15551.976, - "data": 61681.883, - "framework": 3489265.607, - "kernel_overhead": 1667470.908, - "profiling_overhead": 51777.778, - "profiling_runs": 1708335.038, + "compilation_time": 23602.028, + "data": 79279.707, + "framework": 3091964.3839999996, + "kernel_overhead": 1453865.355, + "profiling_overhead": 65996.476, + "profiling_runs": 1492822.846, "runtimes": [ - 5896.256 + 5451.52 ], - "search_algorithm": 26.216, - "validation": 24.103 + "search_algorithm": 46.017, + "validation": 25.615 }, - "timestamp": "2026-03-02 14:28:20 UTC" + "timestamp": "2026-03-05 08:59:16 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -25237,61 +25237,61 @@ { "name": "time", "unit": "", - "value": 5891.712 + "value": 3250.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.20072907403607 + "value": 19.325615979934547 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6588.0 + "value": 7568.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834868.0 + "value": 1836708.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0765140230514716 + "value": 1.9105284131095486 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 102742.0 + "value": 59956.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100462.0 + "value": 2100317.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4408777573354544 + "value": 1.5962124904598585 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 1048576.0 + "value": 2097152.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02066237299148047 + "value": 0.0747984177164143 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25321,13 +25321,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.01044763138049 + "value": 98.53428792683535 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9753148287838 + "value": 99.95608948478998 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25339,7 +25339,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -25351,43 +25351,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5813305344.0 + "value": 2295332864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 83886080.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 986185728.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 897892352.0 + "value": 471597056.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94589399996889 + "value": 42.2606766213056 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25399,13 +25399,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.21799225073554 + "value": 51.08481810122784 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3410133340848167 + "value": 1.0351659917973413 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25417,7 +25417,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.49908186639601 + "value": 89.74845614533342 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25436,32 +25436,32 @@ "time" ], "times": { - "compilation": 15628.28, - "data": 60607.116, - "framework": 3489120.392, - "kernel_overhead": 1668402.632, - "profiling_overhead": 50940.681, - "profiling_runs": 1709169.963, + "compilation_time": 23873.486, + "data": 76100.779, + "framework": 1692229.267, + "kernel_overhead": 762357.011, + "profiling_overhead": 62902.878, + "profiling_runs": 790868.599, "runtimes": [ - 5891.712 + 3250.912 ], - "search_algorithm": 24.941, - "validation": 17.959 + "search_algorithm": 44.932, + "validation": 25.656 }, - "timestamp": "2026-03-02 14:28:21 UTC" + "timestamp": "2026-03-05 08:59:17 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, @@ -25469,12 +25469,12 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "0", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" }, @@ -25484,49 +25484,49 @@ { "name": "time", "unit": "", - "value": 5940.928 + "value": 6145.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.186546045165684 + "value": 10.123611052282321 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8192.0 + "value": 6812.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838588.0 + "value": 1828772.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0813241380591092 + "value": 1.0272348554438024 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 107362.0 + "value": 106246.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109209.0 + "value": 2100518.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.44087576277698953 + "value": 0.4200296378029494 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25538,7 +25538,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020661806680617196 + "value": 0.019684418782933914 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25568,13 +25568,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97973547106659 + "value": 81.96010490876205 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97581405887304 + "value": 99.9700862072777 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25598,13 +25598,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5813305344.0 + "value": 6215958528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -25616,7 +25616,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 986185728.0 + "value": 1355284480.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -25628,13 +25628,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 897892352.0 + "value": 936706048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94469589097078 + "value": 42.91808833600284 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25646,13 +25646,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.21707795379767 + "value": 26.88383508633597 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34100228484203726 + "value": 0.3248900968685622 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25664,7 +25664,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.49602145395343 + "value": 93.81168858090346 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25683,19 +25683,19 @@ "time" ], "times": { - "compilation": 15978.381, - "data": 66397.71, - "framework": 3497656.438, - "kernel_overhead": 1666869.345, - "profiling_overhead": 56462.576, - "profiling_runs": 1707926.807, + "compilation_time": 23605.187, + "data": 77194.739, + "framework": 3585690.7630000003, + "kernel_overhead": 1700806.226, + "profiling_overhead": 64119.629, + "profiling_runs": 1743570.169, "runtimes": [ - 5940.928 + 6145.376 ], - "search_algorithm": 28.082, - "validation": 21.911 + "search_algorithm": 45.985, + "validation": 32.586 }, - "timestamp": "2026-03-02 14:28:23 UTC" + "timestamp": "2026-03-05 08:59:19 UTC" }, { "compilation_data": { @@ -25731,49 +25731,49 @@ { "name": "time", "unit": "", - "value": 5914.976 + "value": 5834.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.178480924652092 + "value": 10.709511611257774 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12872.0 + "value": 2404.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1831628.0 + "value": 1835780.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0751625781954461 + "value": 1.0818129541174824 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108330.0 + "value": 101199.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101734.0 + "value": 2103894.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4415311033885332 + "value": 0.4415511537539678 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25785,7 +25785,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02069249537348944 + "value": 0.020690157516283985 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25815,13 +25815,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.83479691268009 + "value": 81.83273272727429 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97294182165494 + "value": 99.95715059622778 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25881,7 +25881,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.01085691181371 + "value": 44.01305576636728 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25893,13 +25893,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.25980027042804 + "value": 28.261071429506924 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3415185823696748 + "value": 0.34153394427748845 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25911,7 +25911,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.6391095724791 + "value": 94.64336570877545 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25930,19 +25930,19 @@ "time" ], "times": { - "compilation": 16817.297, - "data": 63565.106, - "framework": 3494574.2929999996, - "kernel_overhead": 1668209.927, - "profiling_overhead": 53583.877, - "profiling_runs": 1709215.383, + "compilation_time": 24399.104, + "data": 78707.851, + "framework": 3488979.1040000003, + "kernel_overhead": 1651780.17, + "profiling_overhead": 65385.449, + "profiling_runs": 1693105.634, "runtimes": [ - 5914.976 + 5834.656 ], - "search_algorithm": 35.515, - "validation": 22.316 + "search_algorithm": 46.221, + "validation": 32.433 }, - "timestamp": "2026-03-02 14:28:25 UTC" + "timestamp": "2026-03-05 08:59:20 UTC" }, { "compilation_data": { @@ -25978,49 +25978,49 @@ { "name": "time", "unit": "", - "value": 6857.76 + "value": 6838.304 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.868628600996479 + "value": 8.943831679351948 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4596.0 + "value": 56.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872920.0 + "value": 1871068.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9244723215811693 + "value": 0.9117730596858147 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 125510.0 + "value": 115017.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108398.0 + "value": 2099184.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.984419350454 + "value": 47.98036528139767 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -26032,7 +26032,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5622013927043412 + "value": 0.5622650615557956 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26062,13 +26062,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.42735586680055 + "value": 65.02260925906698 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95347979923307 + "value": 99.9539028794693 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26128,7 +26128,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.37338247270446 + "value": 21.375918425583322 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26140,13 +26140,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.998423536862138 + "value": 24.001039747935177 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.373398904625606 + "value": 24.376055993996662 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26158,7 +26158,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.20040964654258 + "value": 47.205507498912674 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26177,37 +26177,37 @@ "time" ], "times": { - "compilation": 40977.46, - "data": 65573.323, - "framework": 519523.851, - "kernel_overhead": 175370.554, - "profiling_overhead": 55384.183, - "profiling_runs": 223195.791, + "compilation_time": 23143.396, + "data": 77807.367, + "framework": 543511.4839999999, + "kernel_overhead": 176446.408, + "profiling_overhead": 64830.78, + "profiling_runs": 224426.929, "runtimes": [ - 6857.76 + 6838.304 ], - "search_algorithm": 31.675, - "validation": 18.574 + "search_algorithm": 47.617, + "validation": 28.578 }, - "timestamp": "2026-03-02 14:28:25 UTC" + "timestamp": "2026-03-05 08:59:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, - "z": 256 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 39 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -26215,9 +26215,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -26225,61 +26225,61 @@ { "name": "time", "unit": "", - "value": 7351.744 + "value": 3652.224 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.086673721051312 + "value": 17.583776723372605 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9932.0 + "value": 6388.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871708.0 + "value": 1869680.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9204799370719983 + "value": 1.71537686223448 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 124777.0 + "value": 68051.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102818.0 + "value": 2100454.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.0113898022368 + "value": 45.68764072368542 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.562439115081832 + "value": 0.5352247977907167 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26309,13 +26309,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 77.34138581322715 + "value": 73.9308451133215 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9302381097528 + "value": 99.89637417377013 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26327,7 +26327,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26339,43 +26339,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2952790016.0 + "value": 1509949440.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4311744512.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 318767104.0 + "value": 184549376.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 527958016.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.38670063458577 + "value": 28.976209062225333 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26387,13 +26387,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.014154971926803 + "value": 45.71989369573479 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.38937614336316 + "value": 23.306430184739803 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26405,7 +26405,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.23137145942153 + "value": 56.88246048535434 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26424,47 +26424,47 @@ "time" ], "times": { - "compilation": 17472.586, - "data": 64183.494, - "framework": 516428.356, - "kernel_overhead": 174892.757, - "profiling_overhead": 54378.069, - "profiling_runs": 222974.036, + "compilation_time": 22928.35, + "data": 77940.253, + "framework": 360267.857, + "kernel_overhead": 93488.077, + "profiling_overhead": 64768.362, + "profiling_runs": 124071.165, "runtimes": [ - 7351.744 + 3652.224 ], - "search_algorithm": 28.259, - "validation": 23.189 + "search_algorithm": 43.324, + "validation": 27.699 }, - "timestamp": "2026-03-02 14:28:26 UTC" + "timestamp": "2026-03-05 08:59:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 256 + "x": 16, + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -26472,61 +26472,61 @@ { "name": "time", "unit": "", - "value": 6976.0 + "value": 2251.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.644116301424306 + "value": 29.10725421681977 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1632.0 + "value": 7880.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838616.0 + "value": 1874060.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9146661885816736 + "value": 2.808512695675129 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114829.0 + "value": 46562.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100167.0 + "value": 2104562.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.98473757772827 + "value": 37.926675254859596 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5621882105522493 + "value": 0.4442008794367671 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26556,13 +26556,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.15492998046548 + "value": 95.6732455284196 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95245140329449 + "value": 99.90418523905078 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26574,7 +26574,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26586,43 +26586,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2952790016.0 + "value": 591396864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4311744512.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 318767104.0 + "value": 146800640.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 527958016.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.373790140964353 + "value": 43.23965031732271 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26634,13 +26634,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.99810774703223 + "value": 75.88299052985222 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.373078180579608 + "value": 19.48947901303822 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26652,7 +26652,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.199792659419046 + "value": 64.54588266337716 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26671,47 +26671,47 @@ "time" ], "times": { - "compilation": 19606.853, - "data": 64840.07, - "framework": 520539.77800000005, - "kernel_overhead": 176220.746, - "profiling_overhead": 54907.267, - "profiling_runs": 224571.695, + "compilation_time": 23380.564, + "data": 79301.446, + "framework": 235776.941, + "kernel_overhead": 33754.572, + "profiling_overhead": 65442.291, + "profiling_runs": 57278.632, "runtimes": [ - 6976.0 + 2251.36 ], - "search_algorithm": 29.386, - "validation": 27.52 + "search_algorithm": 36.645, + "validation": 24.078 }, - "timestamp": "2026-03-02 14:28:26 UTC" + "timestamp": "2026-03-05 08:59:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 256 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -26719,61 +26719,61 @@ { "name": "time", "unit": "", - "value": 6989.568 + "value": 2009.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.915154728243795 + "value": 32.736522346368716 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16336.0 + "value": 3516.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843148.0 + "value": 1871632.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9274240800897757 + "value": 3.1707439514822613 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133058.0 + "value": 37526.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2110037.0 + "value": 2099558.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.01459473779753 + "value": 21.53581128767231 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5624917818316483 + "value": 0.2521758581839152 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26803,13 +26803,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 80.66695814866613 + "value": 97.98813474912558 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93940021718852 + "value": 99.89026307310597 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26821,7 +26821,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -26833,43 +26833,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2952790016.0 + "value": 564133888.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4311744512.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 318767104.0 + "value": 115343360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 527958016.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.387127921726254 + "value": 43.63178340669749 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26881,13 +26881,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.01420191235312 + "value": 86.17058721440534 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.389423817233634 + "value": 11.23415370422179 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26899,7 +26899,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.23146378249991 + "value": 61.725691971415586 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26918,47 +26918,47 @@ "time" ], "times": { - "compilation": 20670.828, - "data": 63792.536, - "framework": 519296.566, - "kernel_overhead": 176870.958, - "profiling_overhead": 53785.059, - "profiling_runs": 224848.013, + "compilation_time": 23496.191, + "data": 77507.701, + "framework": 228335.039, + "kernel_overhead": 32061.853, + "profiling_overhead": 64488.781, + "profiling_runs": 54276.704, "runtimes": [ - 6989.568 + 2009.088 ], - "search_algorithm": 30.822, - "validation": 23.317 + "search_algorithm": 47.791, + "validation": 28.562 }, - "timestamp": "2026-03-02 14:28:26 UTC" + "timestamp": "2026-03-05 08:59:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 256 + "x": 16, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -26966,61 +26966,61 @@ { "name": "time", "unit": "", - "value": 7272.128 + "value": 1881.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.0130426458042505 + "value": 35.0687637467725 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11380.0 + "value": 6568.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840548.0 + "value": 1871008.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9246018763390682 + "value": 3.378528909941584 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 123697.0 + "value": 37575.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101652.0 + "value": 2100390.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.271523290418806 + "value": 11.45270438714915 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5655291938395839 + "value": 0.13411828036090565 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27050,13 +27050,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.94260047624633 + "value": 98.25203583415436 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96757013686113 + "value": 99.9069409811896 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27068,7 +27068,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 8589934592.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27080,43 +27080,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2952790016.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 4311744512.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 318767104.0 + "value": 558891008.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 50331648.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 527958016.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.497111321867386 + "value": 37.78954299449883 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27128,13 +27128,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.1370732236344 + "value": 91.64336180634677 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.51421499275369 + "value": 6.1528135978382235 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27146,7 +27146,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.47316833723221 + "value": 66.50787591139473 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27165,19 +27165,19 @@ "time" ], "times": { - "compilation": 23126.145, - "data": 64273.489, - "framework": 518086.995, - "kernel_overhead": 175597.522, - "profiling_overhead": 54436.64, - "profiling_runs": 223779.344, + "compilation_time": 23565.287, + "data": 78677.587, + "framework": 237094.80099999998, + "kernel_overhead": 35672.401, + "profiling_overhead": 65580.285, + "profiling_runs": 57164.528, "runtimes": [ - 7272.128 + 1881.568 ], - "search_algorithm": 44.582, - "validation": 25.374 + "search_algorithm": 35.836, + "validation": 30.09 }, - "timestamp": "2026-03-02 14:28:27 UTC" + "timestamp": "2026-03-05 08:59:22 UTC" }, { "compilation_data": { @@ -27185,7 +27185,7 @@ "global_size": { "x": 16, "y": 64, - "z": 128 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -27195,7 +27195,7 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 39 + "registers": 48 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -27205,7 +27205,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -27213,61 +27213,61 @@ { "name": "time", "unit": "", - "value": 3609.792 + "value": 1811.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.67273794716617 + "value": 35.950265145922536 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10984.0 + "value": 1508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874480.0 + "value": 1868504.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7219822615106954 + "value": 3.4884482477989205 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 76147.0 + "value": 29927.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102301.0 + "value": 2099302.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.68627062936657 + "value": 5.931196964691672 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5352176001133331 + "value": 0.06944808665095645 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27297,13 +27297,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.27188227072129 + "value": 81.69814408800968 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90380375730335 + "value": 99.88874205144946 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27315,7 +27315,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27327,43 +27327,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1509949440.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 184549376.0 + "value": 283639808.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 333971456.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.973583641123852 + "value": 37.64608182096902 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27375,13 +27375,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.7158788340919 + "value": 94.92539969312452 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.30438354628513 + "value": 3.37198380257559 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27393,7 +27393,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.87755275685799 + "value": 58.686422969761196 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27412,45 +27412,45 @@ "time" ], "times": { - "compilation": 69863.99, - "data": 62028.658, - "framework": 332236.194, - "kernel_overhead": 93690.086, - "profiling_overhead": 52143.289, - "profiling_runs": 124374.161, + "compilation_time": 24273.365, + "data": 77696.767, + "framework": 209756.816, + "kernel_overhead": 23103.973, + "profiling_overhead": 64590.114, + "profiling_runs": 44365.962, "runtimes": [ - 3609.792 + 1811.744 ], - "search_algorithm": 36.286, - "validation": 30.05 + "search_algorithm": 42.621, + "validation": 23.942 }, - "timestamp": "2026-03-02 14:28:27 UTC" + "timestamp": "2026-03-05 08:59:22 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, + "y": 64, "z": 128 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 39 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "2" }, @@ -27460,49 +27460,49 @@ { "name": "time", "unit": "", - "value": 3731.328 + "value": 6219.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.566689944134078 + "value": 9.706120208990015 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13832.0 + "value": 5160.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1882292.0 + "value": 1870988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7533247092169362 + "value": 0.9985791993776746 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 78103.0 + "value": 109368.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2142399.0 + "value": 2100326.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.731449886225505 + "value": 26.28561285914418 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -27514,7 +27514,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5357083301817018 + "value": 0.3079684004070406 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27544,13 +27544,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.99812484763592 + "value": 98.02300693607765 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9389231183016 + "value": 99.97085726752981 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27562,7 +27562,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27574,13 +27574,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1509949440.0 + "value": 10880024576.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -27592,7 +27592,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 184549376.0 + "value": 2248146944.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -27604,13 +27604,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 333971456.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.99049548125157 + "value": 43.02510372501284 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27622,13 +27622,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.74171512207048 + "value": 26.28763110207229 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31755399777421 + "value": 13.400530698517318 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27640,7 +27640,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.909727556666276 + "value": 79.91571439953724 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27659,47 +27659,47 @@ "time" ], "times": { - "compilation": 26875.376, - "data": 62327.017, - "framework": 334778.735, - "kernel_overhead": 94815.817, - "profiling_overhead": 52303.654, - "profiling_runs": 125332.247, + "compilation_time": 23506.024, + "data": 77793.292, + "framework": 2192436.9220000003, + "kernel_overhead": 1002712.253, + "profiling_overhead": 64599.224, + "profiling_runs": 1047332.153, "runtimes": [ - 3731.328 + 6219.296 ], - "search_algorithm": 36.55, - "validation": 25.646 + "search_algorithm": 47.317, + "validation": 30.109 }, - "timestamp": "2026-03-02 14:28:27 UTC" + "timestamp": "2026-03-05 08:59:23 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 128 + "x": 16, + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 39 + "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -27707,61 +27707,61 @@ { "name": "time", "unit": "", - "value": 3612.32 + "value": 6230.464 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.333346870010768 + "value": 10.348258072902029 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 884.0 + "value": 4740.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837744.0 + "value": 1880504.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7060066325294407 + "value": 1.043259619567179 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61013.0 + "value": 111197.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2097343.0 + "value": 2139178.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.68644386944465 + "value": 13.384641876047414 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5352252640437013 + "value": 0.15681331055065298 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27791,13 +27791,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 74.35289880582513 + "value": 98.85156697691373 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89440637358206 + "value": 99.97233456374398 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27809,7 +27809,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -27821,43 +27821,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1509949440.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 184549376.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 333971456.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.976198239327367 + "value": 36.23404201167492 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27869,13 +27869,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.72083415187201 + "value": 26.77021109633123 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.30690959695038 + "value": 6.875552263999135 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27887,7 +27887,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.883718612170966 + "value": 83.50037668361028 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27906,47 +27906,47 @@ "time" ], "times": { - "compilation": 28474.59, - "data": 61982.488, - "framework": 335090.447, - "kernel_overhead": 95164.305, - "profiling_overhead": 52130.775, - "profiling_runs": 125812.879, + "compilation_time": 23758.457, + "data": 76694.62, + "framework": 2768185.393, + "kernel_overhead": 1292173.642, + "profiling_overhead": 63650.278, + "profiling_runs": 1335666.853, "runtimes": [ - 3612.32 + 6230.464 ], - "search_algorithm": 38.232, - "validation": 24.424 + "search_algorithm": 46.036, + "validation": 31.555 }, - "timestamp": "2026-03-02 14:28:27 UTC" + "timestamp": "2026-03-05 08:59:24 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 128 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 39 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -27954,61 +27954,61 @@ { "name": "time", "unit": "", - "value": 3608.064 + "value": 8069.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.342654873834034 + "value": 7.565581798672456 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 440.0 + "value": 5436.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837832.0 + "value": 1934936.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.710952072827599 + "value": 48.79736433816041 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62018.0 + "value": 847850.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099165.0 + "value": 138415199.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.73453935989875 + "value": 5.139789570159697 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5357120377771154 + "value": 0.05983973149257982 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28026,25 +28026,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.89972194256015 + "value": 94.2039972968962 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93868989785754 + "value": 99.68922544314479 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28056,7 +28056,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28068,43 +28068,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1509949440.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 184549376.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 333971456.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.990891918773894 + "value": 12.314619525187922 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28116,13 +28116,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.74213844211486 + "value": 20.488969520365025 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.317769791781206 + "value": 13.015697922849071 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28134,7 +28134,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.910254231897625 + "value": 32.08933150805714 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28153,47 +28153,47 @@ "time" ], "times": { - "compilation": 29599.36, - "data": 61802.434, - "framework": 331777.004, - "kernel_overhead": 93760.133, - "profiling_overhead": 51903.502, - "profiling_runs": 124310.935, + "compilation_time": 23476.114, + "data": 77612.241, + "framework": 953892.8030000001, + "kernel_overhead": 378516.706, + "profiling_overhead": 64645.08, + "profiling_runs": 433118.776, "runtimes": [ - 3608.064 + 8069.248 ], - "search_algorithm": 41.12, - "validation": 25.888 + "search_algorithm": 45.023, + "validation": 26.201 }, - "timestamp": "2026-03-02 14:28:28 UTC" + "timestamp": "2026-03-05 08:59:25 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 128 + "x": 16, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 39 + "private_memory_size": 64, + "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -28201,61 +28201,61 @@ { "name": "time", "unit": "", - "value": 3614.752 + "value": 8128.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.338660453554136 + "value": 7.931978389342414 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6928.0 + "value": 6388.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840628.0 + "value": 2020848.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7196334912365066 + "value": 54.2846779232238 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68891.0 + "value": 17099308.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106478.0 + "value": 138415166.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.724493631838364 + "value": 2.547041290054688 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5356029333283208 + "value": 0.029837970206029144 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28273,25 +28273,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.71807619170482 + "value": 88.57056363845184 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92591712140666 + "value": 99.13020392484891 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28303,7 +28303,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6442450944.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28315,43 +28315,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1509949440.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 142606336.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 184549376.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 333971456.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.988921838549814 + "value": 10.087630709737056 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28363,13 +28363,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.738668166024354 + "value": 20.548114352807506 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31600076432101 + "value": 11.753962873200193 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28381,7 +28381,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.90597450133772 + "value": 29.059115540481677 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28400,19 +28400,19 @@ "time" ], "times": { - "compilation": 29521.183, - "data": 61763.649, - "framework": 333966.066, - "kernel_overhead": 94999.219, - "profiling_overhead": 51639.092, - "profiling_runs": 125564.106, + "compilation_time": 23237.85, + "data": 78323.643, + "framework": 925576.61, + "kernel_overhead": 363636.012, + "profiling_overhead": 65227.697, + "profiling_runs": 418389.258, "runtimes": [ - 3614.752 + 8128.928 ], - "search_algorithm": 40.285, - "validation": 25.714 + "search_algorithm": 43.257, + "validation": 34.214 }, - "timestamp": "2026-03-02 14:28:28 UTC" + "timestamp": "2026-03-05 08:59:25 UTC" }, { "compilation_data": { @@ -28420,7 +28420,7 @@ "global_size": { "x": 16, "y": 64, - "z": 64 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -28429,18 +28429,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 40 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -28448,61 +28448,61 @@ { "name": "time", "unit": "", - "value": 2158.432 + "value": 9271.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.589428765097363 + "value": 8.361336209678164 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5176.0 + "value": 23072.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871720.0 + "value": 2298752.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.813253982496849 + "value": 74.47694099175209 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 44059.0 + "value": 106865410.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100164.0 + "value": 138426126.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.922619458386436 + "value": 1.1190299254804366 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4441424780009324 + "value": 0.012841988486605517 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28520,25 +28520,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.7460904306492 + "value": 92.22626648213634 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90196987246175 + "value": 96.9623836936803 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28550,7 +28550,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28562,43 +28562,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 591396864.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 146800640.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 228327424.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23505892934158 + "value": 7.884199828722608 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28610,13 +28610,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.87469630705151 + "value": 18.082883566927187 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.487348758549363 + "value": 9.772085638523762 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28628,7 +28628,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.53897290293878 + "value": 24.19869194179137 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28647,45 +28647,45 @@ "time" ], "times": { - "compilation": 89996.223, - "data": 61670.272, - "framework": 207894.32799999998, - "kernel_overhead": 35663.58, - "profiling_overhead": 51169.338, - "profiling_runs": 59391.138, + "compilation_time": 23628.984, + "data": 78136.155, + "framework": 907625.223, + "kernel_overhead": 351026.608, + "profiling_overhead": 64951.105, + "profiling_runs": 413511.355, "runtimes": [ - 2158.432 + 9271.264 ], - "search_algorithm": 39.581, - "validation": 27.727 + "search_algorithm": 44.972, + "validation": 31.883 }, - "timestamp": "2026-03-02 14:28:28 UTC" + "timestamp": "2026-03-05 08:59:26 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, + "y": 64, "z": 64 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "4" }, @@ -28695,49 +28695,49 @@ { "name": "time", "unit": "", - "value": 2198.208 + "value": 3974.368 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.41712786259542 + "value": 16.10353470225422 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5464.0 + "value": 5812.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873988.0 + "value": 1872360.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.822642145390205 + "value": 1.5722327765400093 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46225.0 + "value": 72505.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104490.0 + "value": 2100130.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.92319592697868 + "value": 20.896644579786024 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28749,7 +28749,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.44413108047409044 + "value": 0.24481447752490332 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28779,13 +28779,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.78152050249173 + "value": 98.71323469844259 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90282656018074 + "value": 99.9660497165348 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28797,7 +28797,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -28809,13 +28809,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 591396864.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -28827,7 +28827,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 146800640.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -28839,13 +28839,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 228327424.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23406626030107 + "value": 39.61192487478861 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28857,13 +28857,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.87209859564655 + "value": 41.79586064408222 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.48668157290531 + "value": 10.73467905214221 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28875,7 +28875,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.53681868965265 + "value": 83.34741284248946 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28894,47 +28894,47 @@ "time" ], "times": { - "compilation": 30375.567, - "data": 62044.227, - "framework": 209254.79, - "kernel_overhead": 36200.66, - "profiling_overhead": 51449.713, - "profiling_runs": 59560.19, + "compilation_time": 23005.766, + "data": 78419.209, + "framework": 1408039.665, + "kernel_overhead": 616126.989, + "profiling_overhead": 65189.269, + "profiling_runs": 648304.198, "runtimes": [ - 2198.208 + 3974.368 ], - "search_algorithm": 38.897, - "validation": 28.591 + "search_algorithm": 52.353, + "validation": 28.92 }, - "timestamp": "2026-03-02 14:28:28 UTC" + "timestamp": "2026-03-05 08:59:27 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 64 + "x": 16, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -28942,61 +28942,61 @@ { "name": "time", "unit": "", - "value": 2221.344 + "value": 4189.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.860583510962243 + "value": 15.160931210838624 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 604.0 + "value": 208.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837224.0 + "value": 1870452.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.786079819724869 + "value": 1.4753196535093334 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37488.0 + "value": 70909.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2097435.0 + "value": 2099014.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.92201729243877 + "value": 9.818873582111486 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4440923974565519 + "value": 0.11503670539668939 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29026,13 +29026,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.05945442013373 + "value": 98.9485235904877 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89472857797642 + "value": 99.96939773678764 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29044,7 +29044,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29056,43 +29056,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 591396864.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 146800640.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 228327424.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23386730387939 + "value": 43.266983540925125 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29104,13 +29104,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.87164032059628 + "value": 39.27788202957959 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.48656387140315 + "value": 5.12070044037976 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29122,7 +29122,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.53637828756918 + "value": 90.9360298967861 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29141,47 +29141,47 @@ "time" ], "times": { - "compilation": 30133.442, - "data": 61238.729, - "framework": 206991.187, - "kernel_overhead": 35625.614, - "profiling_overhead": 50699.321, - "profiling_runs": 59427.523, + "compilation_time": 23162.434, + "data": 78210.365, + "framework": 1914000.427, + "kernel_overhead": 869198.536, + "profiling_overhead": 64361.405, + "profiling_runs": 902230.121, "runtimes": [ - 2221.344 + 4189.664 ], - "search_algorithm": 40.259, - "validation": 25.103 + "search_algorithm": 46.706, + "validation": 34.87 }, - "timestamp": "2026-03-02 14:28:28 UTC" + "timestamp": "2026-03-05 08:59:28 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 64 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -29189,61 +29189,61 @@ { "name": "time", "unit": "", - "value": 2208.672 + "value": 6884.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.075487410788128 + "value": 9.414195238822497 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1348.0 + "value": 18164.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843640.0 + "value": 1875472.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.8358902756560203 + "value": 0.9398529123279893 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37998.0 + "value": 132228.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2120927.0 + "value": 2108635.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.923868424383805 + "value": 3.0412393122125994 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4441155101504952 + "value": 0.03563363094432863 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29273,13 +29273,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.04745675686571 + "value": 98.85747023008193 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89049050720264 + "value": 99.98186238615355 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29291,7 +29291,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29303,43 +29303,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 591396864.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 146800640.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 228327424.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23719507818382 + "value": 36.477650768245375 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29351,13 +29351,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.87880823038496 + "value": 24.330304994762624 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.488404848233635 + "value": 1.6335043636620412 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29369,7 +29369,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.54252590598912 + "value": 87.9778570826454 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29388,47 +29388,47 @@ "time" ], "times": { - "compilation": 29996.506, - "data": 61573.195, - "framework": 206753.488, - "kernel_overhead": 35366.118, - "profiling_overhead": 50734.227, - "profiling_runs": 59079.948, + "compilation_time": 23297.47, + "data": 77793.191, + "framework": 3137646.5700000003, + "kernel_overhead": 1474411.462, + "profiling_overhead": 64708.816, + "profiling_runs": 1520733.101, "runtimes": [ - 2208.672 + 6884.192 ], - "search_algorithm": 41.103, - "validation": 28.744 + "search_algorithm": 45.189, + "validation": 27.508 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:29 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 64 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 40 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -29436,61 +29436,61 @@ { "name": "time", "unit": "", - "value": 2197.568 + "value": 9372.608 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 21.848432669576287 + "value": 8.149235813789296 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5252.0 + "value": 27184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840168.0 + "value": 2277592.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.831065951788857 + "value": 81.86373883703486 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 40832.0 + "value": 133176230.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102356.0 + "value": 138429726.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.98765388840588 + "value": 1.117854555393024 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4447545082156342 + "value": 0.01276277179235494 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29508,25 +29508,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.47758596609326 + "value": 98.89073272623502 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87374468284366 + "value": 97.87694782352884 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29538,7 +29538,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5368709120.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29550,43 +29550,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 591396864.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 71303168.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 146800640.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 228327424.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.307155872731926 + "value": 7.758059622644825 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29598,13 +29598,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.00072435771385 + "value": 17.80341351187871 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.519717291092523 + "value": 5.170205169038019 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29616,7 +29616,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.64622778257359 + "value": 17.148417865654356 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29635,19 +29635,19 @@ "time" ], "times": { - "compilation": 29907.214, - "data": 61476.263, - "framework": 207324.82400000002, - "kernel_overhead": 35645.678, - "profiling_overhead": 51069.523, - "profiling_runs": 59133.36, + "compilation_time": 40445.627, + "data": 78500.053, + "framework": 587405.115, + "kernel_overhead": 190423.082, + "profiling_overhead": 65360.741, + "profiling_runs": 253121.239, "runtimes": [ - 2197.568 + 9372.608 ], - "search_algorithm": 38.556, - "validation": 26.658 + "search_algorithm": 53.216, + "validation": 25.378 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:30 UTC" }, { "compilation_data": { @@ -29665,10 +29665,10 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", @@ -29683,49 +29683,49 @@ { "name": "time", "unit": "", - "value": 1904.224 + "value": 2808.512 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.728481407866038 + "value": 22.83924725309027 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 336.0 + "value": 3080.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871940.0 + "value": 1870016.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2027139801926654 + "value": 2.213543553120114 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34742.0 + "value": 50927.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102529.0 + "value": 2100006.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.532818952394678 + "value": 14.86013311184577 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -29737,7 +29737,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2521254360977023 + "value": 0.17407550014832116 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29767,13 +29767,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.03762980334112 + "value": 98.80407288238271 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89752210534589 + "value": 99.95424616971567 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29785,7 +29785,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -29797,13 +29797,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 564133888.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -29815,7 +29815,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 115343360.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -29827,13 +29827,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 192282624.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.61988335619086 + "value": 45.94868359175196 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29845,13 +29845,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.14709725291948 + "value": 59.44496906755335 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.231091292250733 + "value": 7.749905635271849 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29863,7 +29863,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.70903862595839 + "value": 88.39892525202075 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29882,47 +29882,47 @@ "time" ], "times": { - "compilation": 92478.145, - "data": 60671.582, - "framework": 203472.038, - "kernel_overhead": 34987.875, - "profiling_overhead": 50519.939, - "profiling_runs": 57292.642, + "compilation_time": 24516.89, + "data": 76415.591, + "framework": 1185206.882, + "kernel_overhead": 509670.947, + "profiling_overhead": 63125.336, + "profiling_runs": 535995.008, "runtimes": [ - 1904.224 + 2808.512 ], - "search_algorithm": 37.246, - "validation": 27.83 + "search_algorithm": 46.881, + "validation": 25.841 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:30 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 32, - "z": 32 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -29930,61 +29930,61 @@ { "name": "time", "unit": "", - "value": 1938.496 + "value": 3469.856 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.44638197947469 + "value": 18.575869187447676 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3116.0 + "value": 4064.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869412.0 + "value": 1870476.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.181874046185721 + "value": 1.8081179990979117 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35622.0 + "value": 61419.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098581.0 + "value": 2100454.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.53388770603989 + "value": 6.037895557232067 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2522032378807293 + "value": 0.07073613183538675 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30014,13 +30014,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.64385334531833 + "value": 98.72127137578069 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92203311768397 + "value": 99.9677258843067 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30032,7 +30032,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30044,43 +30044,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 564133888.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 115343360.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 192282624.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.62319308319583 + "value": 42.22546223849266 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30092,13 +30092,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.1525423145676 + "value": 48.30478927653387 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.231801170893336 + "value": 3.243119397228226 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30110,7 +30110,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.7129900719678 + "value": 92.41184901988609 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30129,47 +30129,47 @@ "time" ], "times": { - "compilation": 29357.396, - "data": 61101.375, - "framework": 203570.788, - "kernel_overhead": 34707.733, - "profiling_overhead": 50720.061, - "profiling_runs": 57041.619, + "compilation_time": 24854.452, + "data": 79090.156, + "framework": 1866341.435, + "kernel_overhead": 845952.838, + "profiling_overhead": 65870.211, + "profiling_runs": 875428.23, "runtimes": [ - 1938.496 + 3469.856 ], - "search_algorithm": 39.914, - "validation": 36.627 + "search_algorithm": 45.143, + "validation": 28.093 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:31 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 32 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -30177,61 +30177,61 @@ { "name": "time", "unit": "", - "value": 1936.8 + "value": 5622.112 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 24.807319599147416 + "value": 11.478284904543898 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6712.0 + "value": 15884.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840572.0 + "value": 1871124.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1882705726073115 + "value": 1.1375251465747094 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 39513.0 + "value": 108800.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102964.0 + "value": 2106172.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.5335759407197 + "value": 1.8629646311329775 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.25216783690355626 + "value": 0.02182724601615458 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30261,13 +30261,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.90255884991794 + "value": 73.90260667447761 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91015507562675 + "value": 99.97456874047766 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30279,7 +30279,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30291,43 +30291,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 564133888.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 115343360.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 192282624.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.621865086697966 + "value": 42.34648464678536 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30339,13 +30339,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.1506903523413 + "value": 29.80904737692261 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.231559728552307 + "value": 1.0588907210308203 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30357,7 +30357,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.7116031340357 + "value": 96.4523744806733 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30376,47 +30376,47 @@ "time" ], "times": { - "compilation": 30126.4, - "data": 60269.897, - "framework": 201872.278, - "kernel_overhead": 34666.708, - "profiling_overhead": 49775.628, - "profiling_runs": 57160.045, + "compilation_time": 23752.737, + "data": 78275.375, + "framework": 3331901.351, + "kernel_overhead": 1574261.209, + "profiling_overhead": 65167.57, + "profiling_runs": 1614197.197, "runtimes": [ - 1936.8 + 5622.112 ], - "search_algorithm": 42.883, - "validation": 26.109 + "search_algorithm": 45.273, + "validation": 28.349 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:33 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 64, - "z": 32 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -30424,61 +30424,61 @@ { "name": "time", "unit": "", - "value": 1958.016 + "value": 3345.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 24.60400787818716 + "value": 19.243322083562255 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3324.0 + "value": 11340.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839528.0 + "value": 1871636.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1523016100213512 + "value": 1.8699403382849737 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 36144.0 + "value": 66733.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100650.0 + "value": 2101228.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.533421166963688 + "value": 6.227395729805341 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2521724684838284 + "value": 0.07294971409031327 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30508,13 +30508,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.60024435546994 + "value": 98.73949215200489 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90478535891327 + "value": 99.95563439031659 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30526,7 +30526,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30538,43 +30538,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 564133888.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 115343360.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 192282624.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.62444059290934 + "value": 42.773779162971366 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30586,13 +30586,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.15690322866071 + "value": 49.82244218255398 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.232369708033403 + "value": 3.3450125977056504 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30604,7 +30604,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.71611389212334 + "value": 89.86594019005594 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30623,47 +30623,47 @@ "time" ], "times": { - "compilation": 30348.25, - "data": 60567.716, - "framework": 202683.219, - "kernel_overhead": 34760.182, - "profiling_overhead": 50262.53, - "profiling_runs": 57092.791, + "compilation_time": 24416.304, + "data": 77202.174, + "framework": 1737263.253, + "kernel_overhead": 783892.958, + "profiling_overhead": 63493.578, + "profiling_runs": 812674.543, "runtimes": [ - 1958.016 + 3345.248 ], - "search_algorithm": 42.363, - "validation": 27.339 + "search_algorithm": 49.145, + "validation": 26.864 }, - "timestamp": "2026-03-02 14:28:29 UTC" + "timestamp": "2026-03-05 08:59:34 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 32, - "z": 32 + "x": 16, + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 8, + "x": 16, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -30671,61 +30671,61 @@ { "name": "time", "unit": "", - "value": 1918.336 + "value": 6299.328 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.042953273788836 + "value": 10.209754037516996 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5268.0 + "value": 13740.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839432.0 + "value": 1870056.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2372357191619425 + "value": 1.015314226136501 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 36020.0 + "value": 115303.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100259.0 + "value": 2104184.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.790411282645778 + "value": 1.6607401401324724 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2550459422051532 + "value": 0.0194570587558522 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30755,13 +30755,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.06383546105208 + "value": 82.03436036350742 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.84282590146381 + "value": 99.96723964442859 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30773,7 +30773,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4831838208.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -30785,43 +30785,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 564133888.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 69206016.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 115343360.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 192282624.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.1497515658731 + "value": 43.247834443875774 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30833,13 +30833,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.19272598708497 + "value": 26.57407664999056 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.367411053980314 + "value": 0.9439765997494206 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30851,7 +30851,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.458096866619314 + "value": 94.28279058433442 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30870,19 +30870,19 @@ "time" ], "times": { - "compilation": 30195.478, - "data": 59900.917, - "framework": 204887.81999999998, - "kernel_overhead": 36456.096, - "profiling_overhead": 49656.147, - "profiling_runs": 58874.66, + "compilation_time": 23571.895, + "data": 76591.537, + "framework": 3684132.2350000003, + "kernel_overhead": 1750433.176, + "profiling_overhead": 63351.996, + "profiling_runs": 1793755.526, "runtimes": [ - 1918.336 + 6299.328 ], - "search_algorithm": 39.279, - "validation": 30.274 + "search_algorithm": 53.185, + "validation": 26.114 }, - "timestamp": "2026-03-02 14:28:30 UTC" + "timestamp": "2026-03-05 08:59:36 UTC" }, { "compilation_data": { @@ -30890,7 +30890,7 @@ "global_size": { "x": 16, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -30900,17 +30900,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -30918,61 +30918,61 @@ { "name": "time", "unit": "", - "value": 1792.16 + "value": 5934.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.42160030298496 + "value": 10.770157098724763 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2076.0 + "value": 12616.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874632.0 + "value": 1870552.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3991143332631943 + "value": 1.0668700972522296 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32389.0 + "value": 108702.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099677.0 + "value": 2101509.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.450524359286334 + "value": 1.7422890190660063 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1340715561105501 + "value": 0.020412495876137613 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31002,13 +31002,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.22226826586738 + "value": 82.0262258476386 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89474515439014 + "value": 99.97345476426125 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31020,7 +31020,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31032,43 +31032,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 558891008.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 194805760.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78117175859359 + "value": 44.0617771643758 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31080,13 +31080,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.6226195515549 + "value": 27.877261120901935 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.151420990399805 + "value": 0.9902689192117266 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31098,7 +31098,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.49300398055207 + "value": 94.98614256816603 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31117,19 +31117,19 @@ "time" ], "times": { - "compilation": 109746.204, - "data": 59160.057, - "framework": 207316.725, - "kernel_overhead": 38741.543, - "profiling_overhead": 49074.702, - "profiling_runs": 60340.423, + "compilation_time": 23440.068, + "data": 77234.746, + "framework": 3597758.283, + "kernel_overhead": 1707234.07, + "profiling_overhead": 64199.72, + "profiling_runs": 1749089.747, "runtimes": [ - 1792.16 + 5934.016 ], - "search_algorithm": 39.925, - "validation": 29.315 + "search_algorithm": 53.649, + "validation": 29.8 }, - "timestamp": "2026-03-02 14:28:30 UTC" + "timestamp": "2026-03-05 08:59:38 UTC" }, { "compilation_data": { @@ -31137,7 +31137,7 @@ "global_size": { "x": 16, "y": 32, - "z": 16 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -31147,7 +31147,7 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 38 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -31157,7 +31157,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -31165,61 +31165,61 @@ { "name": "time", "unit": "", - "value": 1823.776 + "value": 6841.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.992800324077482 + "value": 9.343634285678053 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3888.0 + "value": 14672.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872496.0 + "value": 1871588.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.387804347016584 + "value": 0.926818620888463 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35765.0 + "value": 129072.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103570.0 + "value": 2102179.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.44987121602905 + "value": 48.01261161198408 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13408258671148598 + "value": 0.5624837838318012 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31249,13 +31249,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.15423117489877 + "value": 82.07431401675503 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90367860726951 + "value": 99.93492719378561 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31267,7 +31267,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31279,43 +31279,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 2952790016.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 558891008.0 + "value": 318767104.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 194805760.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78100318369529 + "value": 21.387068601207 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31327,13 +31327,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.62196408022353 + "value": 24.01493530246874 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.151376982925164 + "value": 24.390168666569814 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31345,7 +31345,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.49258562883733 + "value": 47.23289128404697 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31364,47 +31364,47 @@ "time" ], "times": { - "compilation": 30993.851, - "data": 59401.159, - "framework": 206681.46600000001, - "kernel_overhead": 38018.554, - "profiling_overhead": 49288.255, - "profiling_runs": 59973.498, + "compilation_time": 23885.913, + "data": 78423.869, + "framework": 542079.759, + "kernel_overhead": 175334.942, + "profiling_overhead": 65430.743, + "profiling_runs": 222890.205, "runtimes": [ - 1823.776 + 6841.664 ], - "search_algorithm": 37.513, - "validation": 26.084 + "search_algorithm": 43.587, + "validation": 33.794 }, - "timestamp": "2026-03-02 14:28:30 UTC" + "timestamp": "2026-03-05 08:59:38 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 16 + "x": 16, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 40 + "registers": 39 }, "configuration": { "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -31412,61 +31412,61 @@ { "name": "time", "unit": "", - "value": 1844.992 + "value": 3655.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.483211527854383 + "value": 17.655507844360216 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6104.0 + "value": 9828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837636.0 + "value": 1874300.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.365114432125456 + "value": 1.7245464712907321 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 36371.0 + "value": 73623.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099035.0 + "value": 2109065.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.449687274077688 + "value": 45.73379134048475 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1340634086756307 + "value": 0.5357223726053081 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31496,13 +31496,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.19684405698497 + "value": 93.42922150455854 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.88597402772389 + "value": 99.94332534523667 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31514,7 +31514,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31526,43 +31526,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 1509949440.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 558891008.0 + "value": 184549376.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 194805760.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78116105674005 + "value": 28.9896728953351 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31574,13 +31574,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.62509672995915 + "value": 45.74089929241257 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.151587304867863 + "value": 23.31713811585875 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31592,7 +31592,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.49480446631371 + "value": 56.90869106895323 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31611,32 +31611,32 @@ "time" ], "times": { - "compilation": 30148.364, - "data": 60092.571, - "framework": 207440.613, - "kernel_overhead": 37797.212, - "profiling_overhead": 49782.627, - "profiling_runs": 59768.203, + "compilation_time": 23908.53, + "data": 77494.562, + "framework": 359527.917, + "kernel_overhead": 93523.173, + "profiling_overhead": 64515.821, + "profiling_runs": 123994.361, "runtimes": [ - 1844.992 + 3655.872 ], - "search_algorithm": 44.891, - "validation": 28.315 + "search_algorithm": 47.967, + "validation": 27.149 }, - "timestamp": "2026-03-02 14:28:30 UTC" + "timestamp": "2026-03-05 08:59:39 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 16 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, @@ -31648,10 +31648,10 @@ "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -31659,61 +31659,61 @@ { "name": "time", "unit": "", - "value": 1828.512 + "value": 2197.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.622200491936677 + "value": 29.24564779962359 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2972.0 + "value": 152.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839608.0 + "value": 1869548.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3868634097717 + "value": 2.8108240107389557 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33687.0 + "value": 38219.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101367.0 + "value": 2098982.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.450273033319613 + "value": 37.92505617563906 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13408713699737784 + "value": 0.4441536353015392 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31743,13 +31743,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.0929713527553 + "value": 94.7849438996359 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90058893639264 + "value": 99.89092078022193 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31761,7 +31761,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -31773,43 +31773,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 591396864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 558891008.0 + "value": 146800640.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 194805760.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.782603567084564 + "value": 43.24111685991236 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31821,13 +31821,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.6279071339196 + "value": 75.88499518547295 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.1517759916572 + "value": 19.48999388064393 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31839,7 +31839,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.49689866677261 + "value": 64.54775036729394 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31858,31 +31858,31 @@ "time" ], "times": { - "compilation": 30704.32, - "data": 60330.934, - "framework": 209121.563, - "kernel_overhead": 38366.922, - "profiling_overhead": 49953.125, - "profiling_runs": 60470.582, + "compilation_time": 24033.066, + "data": 79707.738, + "framework": 234429.956, + "kernel_overhead": 33278.098, + "profiling_overhead": 65145.16, + "profiling_runs": 56298.96, "runtimes": [ - 1828.512 + 2197.824 ], - "search_algorithm": 42.955, - "validation": 29.95 + "search_algorithm": 33.244, + "validation": 23.967 }, - "timestamp": "2026-03-02 14:28:30 UTC" + "timestamp": "2026-03-05 08:59:39 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 16 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, @@ -31895,10 +31895,10 @@ "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -31906,61 +31906,61 @@ { "name": "time", "unit": "", - "value": 1805.824 + "value": 1968.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.0876660957615 + "value": 32.87902373780515 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5048.0 + "value": 476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837156.0 + "value": 1871724.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4424506763410343 + "value": 3.162290341690338 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33513.0 + "value": 34083.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100061.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.651104976097383 + "value": 21.537184734534222 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.136445220042346 + "value": 0.25218463042627676 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31990,13 +31990,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.01774670470641 + "value": 97.76032997904697 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89643597656713 + "value": 99.89631702027687 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32008,7 +32008,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4563402752.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32020,43 +32020,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 209715200.0 + "value": 564133888.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 558891008.0 + "value": 115343360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 194805760.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.44908709782345 + "value": 43.630435741916685 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32068,13 +32068,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.24316992726122 + "value": 86.168362444597 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.260222590331257 + "value": 11.233863658548533 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32086,7 +32086,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 67.66914636075914 + "value": 61.724280012127075 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32105,37 +32105,37 @@ "time" ], "times": { - "compilation": 31641.172, - "data": 60361.83, - "framework": 207764.54700000002, - "kernel_overhead": 37955.322, - "profiling_overhead": 50198.386, - "profiling_runs": 59249.009, + "compilation_time": 24292.315, + "data": 77453.765, + "framework": 229225.332, + "kernel_overhead": 32747.389, + "profiling_overhead": 64498.081, + "profiling_runs": 54526.097, "runtimes": [ - 1805.824 + 1968.736 ], - "search_algorithm": 43.879, - "validation": 28.031 + "search_algorithm": 34.994, + "validation": 23.032 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:39 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 40 }, "configuration": { "INNER_UNROLL_FACTOR": "0", @@ -32143,9 +32143,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -32153,61 +32153,61 @@ { "name": "time", "unit": "", - "value": 1731.008 + "value": 1841.408 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.368289565419474 + "value": 35.14250036064628 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5280.0 + "value": 476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869696.0 + "value": 1870432.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5243203511586136 + "value": 3.3761854452145563 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33399.0 + "value": 30368.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101524.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.930921872357288 + "value": 11.452016912643977 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06942355623522745 + "value": 0.1340868151020513 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32237,13 +32237,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.71821866563016 + "value": 98.13247514005076 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87840107911596 + "value": 99.88895731024118 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32255,7 +32255,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32267,43 +32267,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 283639808.0 + "value": 558891008.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 165953536.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.640052855807525 + "value": 37.786602839445685 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32315,13 +32315,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.90169488337604 + "value": 91.63835680591511 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.371141749397269 + "value": 6.152477568756507 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32333,7 +32333,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.67191342912649 + "value": 66.50443892667478 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32352,19 +32352,19 @@ "time" ], "times": { - "compilation": 117306.395, - "data": 60327.551, - "framework": 183504.32499999998, - "kernel_overhead": 26065.145, - "profiling_overhead": 49733.496, - "profiling_runs": 47378.133, + "compilation_time": 23331.835, + "data": 78515.535, + "framework": 236532.50900000002, + "kernel_overhead": 35924.33, + "profiling_overhead": 64574.527, + "profiling_runs": 57518.117, "runtimes": [ - 1731.008 + 1841.408 ], - "search_algorithm": 38.55, - "validation": 30.29 + "search_algorithm": 34.155, + "validation": 26.526 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:39 UTC" }, { "compilation_data": { @@ -32400,49 +32400,49 @@ { "name": "time", "unit": "", - "value": 1764.256 + "value": 1788.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.937080938697317 + "value": 36.09972508420844 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 252.0 + "value": 176.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866392.0 + "value": 1865504.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4911439174028835 + "value": 3.4758003920287486 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28142.0 + "value": 28719.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2097279.0 + "value": 2098886.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.930298228550079 + "value": 5.931386357111656 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -32454,7 +32454,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06944458572981581 + "value": 0.06946359385233628 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32484,13 +32484,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.66753485015992 + "value": 81.69054563092972 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92222346270778 + "value": 99.93016649046385 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32550,7 +32550,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.636609774164526 + "value": 37.64159308588437 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32562,13 +32562,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.88880894631812 + "value": 94.90723719425968 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3706840092014856 + "value": 3.3713386259191362 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32580,7 +32580,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.663946842392235 + "value": 58.67533990095885 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32599,47 +32599,47 @@ "time" ], "times": { - "compilation": 30536.323, - "data": 59981.885, - "framework": 184721.823, - "kernel_overhead": 26475.882, - "profiling_overhead": 49960.297, - "profiling_runs": 48303.759, + "compilation_time": 24107.009, + "data": 79605.99, + "framework": 212635.928, + "kernel_overhead": 23391.2, + "profiling_overhead": 65211.905, + "profiling_runs": 44426.833, "runtimes": [ - 1764.256 + 1788.96 ], - "search_algorithm": 40.293, - "validation": 28.624 + "search_algorithm": 36.408, + "validation": 28.284 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:39 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 8 + "x": 16, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -32647,61 +32647,61 @@ { "name": "time", "unit": "", - "value": 1765.28 + "value": 6371.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.48230092734519 + "value": 9.80612381334403 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 196.0 + "value": 4120.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835780.0 + "value": 1873384.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4968266214622408 + "value": 1.0052188760079641 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30051.0 + "value": 114493.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099162.0 + "value": 2108063.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.930723117251725 + "value": 26.28562253711572 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06944810235094939 + "value": 0.30797007918299707 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32731,13 +32731,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.70632592116904 + "value": 97.46194986627934 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91519269361653 + "value": 99.97126927209614 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32749,7 +32749,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -32761,43 +32761,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 10880024576.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 283639808.0 + "value": 2248146944.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 165953536.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.63552430459736 + "value": 43.025850070858766 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32809,13 +32809,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.90029145743134 + "value": 26.287666061424137 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3710918962539695 + "value": 13.400548519593164 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32827,7 +32827,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.67104577670327 + "value": 79.91587590977844 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32846,47 +32846,47 @@ "time" ], "times": { - "compilation": 30140.925, - "data": 59674.57, - "framework": 181421.825, - "kernel_overhead": 25526.685, - "profiling_overhead": 49335.834, - "profiling_runs": 46884.736, + "compilation_time": 25042.663, + "data": 78994.417, + "framework": 2192373.795, + "kernel_overhead": 1001216.611, + "profiling_overhead": 65534.524, + "profiling_runs": 1046628.243, "runtimes": [ - 1765.28 + 6371.104 ], - "search_algorithm": 43.845, - "validation": 31.409 + "search_algorithm": 60.624, + "validation": 30.401 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:40 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 8 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -32894,61 +32894,61 @@ { "name": "time", "unit": "", - "value": 1755.008 + "value": 6182.72 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.64104941244462 + "value": 10.344568518284197 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 268.0 + "value": 11580.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836336.0 + "value": 1872012.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.492123491174811 + "value": 1.0271626218029224 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28241.0 + "value": 114214.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098524.0 + "value": 2101611.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.9296535068530885 + "value": 13.384725913150739 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06943361427728693 + "value": 0.15681296534906278 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32978,13 +32978,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.66473019100874 + "value": 98.53639400559108 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90422647744198 + "value": 99.97048999377782 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32996,7 +32996,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33008,43 +33008,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 283639808.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 165953536.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.636692731024105 + "value": 36.234851205628885 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33056,13 +33056,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.89090839214354 + "value": 26.770646105571515 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.37075858668381 + "value": 6.875663990005185 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33074,7 +33074,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.665244801336705 + "value": 83.50178769640068 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33093,47 +33093,47 @@ "time" ], "times": { - "compilation": 34962.122, - "data": 60928.002, - "framework": 185682.45299999998, - "kernel_overhead": 26226.474, - "profiling_overhead": 50696.109, - "profiling_runs": 47831.868, + "compilation_time": 23303.694, + "data": 78629.996, + "framework": 2769953.807, + "kernel_overhead": 1291172.967, + "profiling_overhead": 65370.55, + "profiling_runs": 1334780.294, "runtimes": [ - 1755.008 + 6182.72 ], - "search_algorithm": 46.789, - "validation": 35.451 + "search_algorithm": 46.017, + "validation": 27.693 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:42 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 48 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -33141,61 +33141,61 @@ { "name": "time", "unit": "", - "value": 1757.824 + "value": 8192.416 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 27.54344197214357 + "value": 7.804129292473163 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4104.0 + "value": 13408.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838636.0 + "value": 1933264.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5002593192059854 + "value": 48.82638483225434 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32296.0 + "value": 926990.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099549.0 + "value": 138424519.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.960023644544943 + "value": 5.143515509972154 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06975992435408203 + "value": 0.059432383016558114 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33213,25 +33213,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.22938882777015 + "value": 97.76851472130176 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.82424407730409 + "value": 98.0172613032649 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33243,7 +33243,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4429185024.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33255,43 +33255,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 138412032.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 283639808.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 165953536.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.839212284150626 + "value": 12.440640986841089 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33303,13 +33303,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.41324447964932 + "value": 20.696613161042734 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3893132499484806 + "value": 13.147604356697556 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33321,7 +33321,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.98817325635226 + "value": 32.414600206754365 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33340,37 +33340,37 @@ "time" ], "times": { - "compilation": 32434.692, - "data": 60505.393, - "framework": 182748.648, - "kernel_overhead": 25833.474, - "profiling_overhead": 49619.448, - "profiling_runs": 46790.333, + "compilation_time": 23659.143, + "data": 77654.39, + "framework": 953822.304, + "kernel_overhead": 378143.405, + "profiling_overhead": 64397.213, + "profiling_runs": 433627.296, "runtimes": [ - 1757.824 + 8192.416 ], - "search_algorithm": 43.547, - "validation": 27.45 + "search_algorithm": 44.966, + "validation": 28.464 }, - "timestamp": "2026-03-02 14:28:31 UTC" + "timestamp": "2026-03-05 08:59:42 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 128 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 22 + "private_memory_size": 64, + "registers": 34 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -33378,9 +33378,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -33388,61 +33388,61 @@ { "name": "time", "unit": "", - "value": 6180.544 + "value": 8160.832 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.399205593754115 + "value": 7.903214714414323 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 52.0 + "value": 3244.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870524.0 + "value": 2019372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9981783604196638 + "value": 54.39060288792162 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 104573.0 + "value": 18257687.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099048.0 + "value": 138422731.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.285176122356464 + "value": 2.5813156027406032 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3079667216493864 + "value": 0.029615866449795362 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33460,25 +33460,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.2203318165304 + "value": 90.06993871647624 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97547301904629 + "value": 97.8810767778805 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33490,7 +33490,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33502,7 +33502,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10880024576.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33514,31 +33514,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2248146944.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 816054272.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02314933298777 + "value": 10.142650026234906 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33550,13 +33550,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286274143533596 + "value": 20.655437695691393 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.399838967699743 + "value": 11.815354131104721 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33568,7 +33568,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91163980436497 + "value": 29.21095339406554 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33587,19 +33587,19 @@ "time" ], "times": { - "compilation": 77085.897, - "data": 61252.1, - "framework": 2174059.477, - "kernel_overhead": 1008046.814, - "profiling_overhead": 50967.095, - "profiling_runs": 1053793.468, + "compilation_time": 23637.008, + "data": 77822.11, + "framework": 921351.403, + "kernel_overhead": 362018.064, + "profiling_overhead": 64434.214, + "profiling_runs": 417077.015, "runtimes": [ - 6180.544 + 8160.832 ], - "search_algorithm": 31.118, - "validation": 31.169 + "search_algorithm": 45.585, + "validation": 29.084 }, - "timestamp": "2026-03-02 14:28:33 UTC" + "timestamp": "2026-03-05 08:59:43 UTC" }, { "compilation_data": { @@ -33607,7 +33607,7 @@ "global_size": { "x": 16, "y": 32, - "z": 128 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -33616,8 +33616,8 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 22 + "private_memory_size": 128, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -33627,7 +33627,7 @@ "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -33635,61 +33635,61 @@ { "name": "time", "unit": "", - "value": 6324.0 + "value": 9528.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.405938952399276 + "value": 7.409399355838025 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 432.0 + "value": 17908.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871960.0 + "value": 2296556.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9970017452403507 + "value": 76.80966690322954 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 105103.0 + "value": 120883884.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 138417824.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.284200157662436 + "value": 1.0926345259812185 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30794646222443206 + "value": 0.012761413473597551 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33707,25 +33707,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.57323694036057 + "value": 95.07571537404945 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96670650359597 + "value": 97.99409582633444 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33737,7 +33737,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33749,7 +33749,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10880024576.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -33761,31 +33761,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2248146944.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 816054272.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02408799604398 + "value": 7.750469989037202 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33797,13 +33797,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286849921251466 + "value": 17.78023772659733 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400132479387954 + "value": 9.608534230425583 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33815,7 +33815,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91340782526372 + "value": 23.793744680633978 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33834,47 +33834,47 @@ "time" ], "times": { - "compilation": 22365.997, - "data": 62681.924, - "framework": 2168850.27, - "kernel_overhead": 1004207.878, - "profiling_overhead": 52704.575, - "profiling_runs": 1049255.893, + "compilation_time": 23366.86, + "data": 80774.904, + "framework": 916249.8180000001, + "kernel_overhead": 351879.346, + "profiling_overhead": 66747.15, + "profiling_runs": 416848.418, "runtimes": [ - 6324.0 + 9528.736 ], - "search_algorithm": 32.056, - "validation": 21.436 + "search_algorithm": 45.105, + "validation": 33.43 }, - "timestamp": "2026-03-02 14:28:34 UTC" + "timestamp": "2026-03-05 08:59:44 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 128 + "x": 16, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -33882,61 +33882,61 @@ { "name": "time", "unit": "", - "value": 6399.52 + "value": 3981.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.255536521616275 + "value": 16.085597796264185 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 856.0 + "value": 6156.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839696.0 + "value": 1869924.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.001737794154545 + "value": 1.5726548030608558 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108846.0 + "value": 72707.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103458.0 + "value": 2100509.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.28472107494712 + "value": 20.896618161860626 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3079475523065852 + "value": 0.2448150140457833 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33966,13 +33966,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.92650124592967 + "value": 98.42068166385242 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96685694654576 + "value": 99.96517318151933 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33984,7 +33984,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -33996,43 +33996,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10880024576.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2248146944.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 816054272.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.024290022067085 + "value": 39.61348382156134 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34044,13 +34044,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286903412611444 + "value": 41.79631872520104 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400159747444507 + "value": 10.734796703835814 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34062,7 +34062,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91355320677756 + "value": 83.34841414384464 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34081,47 +34081,47 @@ "time" ], "times": { - "compilation": 20685.208, - "data": 64288.48, - "framework": 2177427.641, - "kernel_overhead": 1006927.229, - "profiling_overhead": 54241.152, - "profiling_runs": 1051970.78, + "compilation_time": 23496.392, + "data": 78419.608, + "framework": 1406853.398, + "kernel_overhead": 615962.234, + "profiling_overhead": 64140.026, + "profiling_runs": 648331.53, "runtimes": [ - 6399.52 + 3981.248 ], - "search_algorithm": 35.638, - "validation": 24.978 + "search_algorithm": 45.286, + "validation": 28.348 }, - "timestamp": "2026-03-02 14:28:35 UTC" + "timestamp": "2026-03-05 08:59:44 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 128 + "x": 16, + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -34129,61 +34129,61 @@ { "name": "time", "unit": "", - "value": 6322.656 + "value": 4196.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.589711935766606 + "value": 15.179710678644783 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19076.0 + "value": 444.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843024.0 + "value": 1872572.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0068492232907522 + "value": 1.475595775622297 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 128363.0 + "value": 71327.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2107323.0 + "value": 2099158.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.284597329485138 + "value": 9.819050462580064 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3079411469786517 + "value": 0.11503467539986556 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34213,13 +34213,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.62706740611283 + "value": 98.83762941254982 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96556732339205 + "value": 99.96514096756164 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34231,7 +34231,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34243,43 +34243,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10880024576.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2248146944.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 816054272.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02357381700273 + "value": 43.268291413752344 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34291,13 +34291,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286695755118554 + "value": 39.27886143420288 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400053890792856 + "value": 5.120828126431723 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34309,7 +34309,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91293915210245 + "value": 90.93838081665545 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34328,47 +34328,47 @@ "time" ], "times": { - "compilation": 22656.135, - "data": 61528.757, - "framework": 2170466.767, - "kernel_overhead": 1006208.213, - "profiling_overhead": 51750.338, - "profiling_runs": 1050979.459, + "compilation_time": 22912.046, + "data": 82830.595, + "framework": 1923826.2880000002, + "kernel_overhead": 869312.971, + "profiling_overhead": 69202.54, + "profiling_runs": 902480.182, "runtimes": [ - 6322.656 + 4196.704 ], - "search_algorithm": 26.114, - "validation": 23.448 + "search_algorithm": 48.857, + "validation": 31.264 }, - "timestamp": "2026-03-02 14:28:36 UTC" + "timestamp": "2026-03-05 08:59:45 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 128 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 22 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -34376,61 +34376,61 @@ { "name": "time", "unit": "", - "value": 6448.864 + "value": 6848.352 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.275828897703929 + "value": 9.395725791953979 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1136.0 + "value": 1256.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839516.0 + "value": 1871924.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9976251007100965 + "value": 0.9368606445392504 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106946.0 + "value": 114090.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103557.0 + "value": 2103030.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.22199167630891 + "value": 3.041182419994222 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 67108864.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3071864741955581 + "value": 0.03561599557026055 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34460,13 +34460,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.48318972789014 + "value": 98.81070698634723 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9578961898209 + "value": 99.93100478714572 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34478,7 +34478,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 6979321856.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34490,43 +34490,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10880024576.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2164260864.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2248146944.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 25165824.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 816054272.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.9214955795746 + "value": 36.47814570904168 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34538,13 +34538,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.224287222133785 + "value": 24.33063995278877 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.368240165970544 + "value": 1.6335268522990505 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34556,7 +34556,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.72322916599319 + "value": 87.9791211259898 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34575,47 +34575,47 @@ "time" ], "times": { - "compilation": 17226.48, - "data": 66995.925, - "framework": 2177434.74, - "kernel_overhead": 1004065.802, - "profiling_overhead": 57075.527, - "profiling_runs": 1049297.486, + "compilation_time": 24765.465, + "data": 78507.175, + "framework": 3142472.9299999997, + "kernel_overhead": 1476527.433, + "profiling_overhead": 65013.92, + "profiling_runs": 1522424.402, "runtimes": [ - 6448.864 + 6848.352 ], - "search_algorithm": 25.067, - "validation": 19.946 + "search_algorithm": 44.8, + "validation": 27.653 }, - "timestamp": "2026-03-02 14:28:37 UTC" + "timestamp": "2026-03-05 08:59:47 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 64 + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 25 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -34623,61 +34623,61 @@ { "name": "time", "unit": "", - "value": 6190.688 + "value": 9381.056 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.9429364577250015 + "value": 8.304740008090986 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6836.0 + "value": 28736.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873360.0 + "value": 2283580.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0263728912242345 + "value": 81.82681012099506 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110238.0 + "value": 133095550.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103946.0 + "value": 138425315.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.383709423330373 + "value": 1.1136555259985612 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15680329031695978 + "value": 0.012744580193030606 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34695,25 +34695,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.86851235990034 + "value": 98.66069475091538 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97023869434464 + "value": 97.28106709484224 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34725,7 +34725,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34737,43 +34737,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10854858752.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 837287936.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.23178214299484 + "value": 7.79437417500793 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34785,13 +34785,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.769061703035646 + "value": 17.88693388809159 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875257058494507 + "value": 5.194459926729723 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34803,7 +34803,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.49684240887731 + "value": 17.22891959622547 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34822,19 +34822,19 @@ "time" ], "times": { - "compilation": 58363.516, - "data": 63899.044, - "framework": 2756193.406, - "kernel_overhead": 1297583.963, - "profiling_overhead": 54000.23, - "profiling_runs": 1340710.169, + "compilation_time": 23525.954, + "data": 78876.863, + "framework": 588241.0009999999, + "kernel_overhead": 190636.648, + "profiling_overhead": 65613.933, + "profiling_runs": 253113.557, "runtimes": [ - 6190.688 + 9381.056 ], - "search_algorithm": 25.989, - "validation": 20.236 + "search_algorithm": 39.339, + "validation": 26.655 }, - "timestamp": "2026-03-02 14:28:39 UTC" + "timestamp": "2026-03-05 08:59:47 UTC" }, { "compilation_data": { @@ -34842,7 +34842,7 @@ "global_size": { "x": 16, "y": 32, - "z": 64 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -34852,17 +34852,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 25 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -34870,61 +34870,61 @@ { "name": "time", "unit": "", - "value": 6239.808 + "value": 2818.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.8408930905497165 + "value": 22.871357727003154 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12228.0 + "value": 5704.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1876672.0 + "value": 1870084.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.01942718507223 + "value": 2.216115674587332 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 117160.0 + "value": 52990.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105444.0 + "value": 2100509.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.383295824504549 + "value": 14.860375956736766 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15681113930796367 + "value": 0.17406985317814183 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34954,13 +34954,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.52955473846026 + "value": 98.65697477473694 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97337788587753 + "value": 99.95011698328501 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34972,7 +34972,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -34984,43 +34984,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10854858752.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 837287936.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.23333460408794 + "value": 45.94805106011678 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35032,13 +35032,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.76956106498927 + "value": 59.44549642505709 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875385312590017 + "value": 7.749974387446408 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35050,7 +35050,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.49841834530764 + "value": 88.39983990545713 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35069,47 +35069,47 @@ "time" ], "times": { - "compilation": 17146.328, - "data": 62149.082, - "framework": 2753345.402, - "kernel_overhead": 1297373.906, - "profiling_overhead": 52322.688, - "profiling_runs": 1341499.726, + "compilation_time": 24080.61, + "data": 78081.335, + "framework": 1189883.438, + "kernel_overhead": 510516.2, + "profiling_overhead": 64542.718, + "profiling_runs": 536743.185, "runtimes": [ - 6239.808 + 2818.336 ], - "search_algorithm": 26.309, - "validation": 15.361 + "search_algorithm": 44.337, + "validation": 27.077 }, - "timestamp": "2026-03-02 14:28:40 UTC" + "timestamp": "2026-03-05 08:59:48 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 128, - "z": 64 + "x": 16, + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 2, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 25 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -35117,61 +35117,61 @@ { "name": "time", "unit": "", - "value": 6240.8 + "value": 3460.16 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.671476368331014 + "value": 18.543733035691638 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9264.0 + "value": 2608.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838980.0 + "value": 1868784.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0154630356349206 + "value": 1.8044140573676066 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111506.0 + "value": 57885.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102818.0 + "value": 2099618.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.383781650940765 + "value": 6.037738847581255 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1567976379293333 + "value": 0.07073389641076132 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35201,13 +35201,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.77267913476544 + "value": 98.63925371428081 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96659393179232 + "value": 99.96874307800208 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35219,7 +35219,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35231,43 +35231,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10854858752.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 837287936.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.23236943660888 + "value": 42.2236017323822 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35279,13 +35279,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.769072701291375 + "value": 48.30277124260986 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875259883241827 + "value": 3.2429839091107695 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35297,7 +35297,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.49687711302771 + "value": 92.40809178755784 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35316,47 +35316,47 @@ "time" ], "times": { - "compilation": 17254.464, - "data": 66885.389, - "framework": 2762605.827, - "kernel_overhead": 1297943.976, - "profiling_overhead": 56353.573, - "profiling_runs": 1341422.889, + "compilation_time": 23735.345, + "data": 77464.301, + "framework": 1864602.94, + "kernel_overhead": 846684.464, + "profiling_overhead": 64346.101, + "profiling_runs": 876108.074, "runtimes": [ - 6240.8 + 3460.16 ], - "search_algorithm": 23.8, - "validation": 19.089 + "search_algorithm": 46.294, + "validation": 31.036 }, - "timestamp": "2026-03-02 14:28:42 UTC" + "timestamp": "2026-03-05 08:59:49 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, - "y": 64, - "z": 64 + "x": 16, + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 32, - "y": 4, + "x": 16, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 25 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -35364,61 +35364,61 @@ { "name": "time", "unit": "", - "value": 6278.4 + "value": 5617.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.813860799303271 + "value": 11.467687386096356 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18184.0 + "value": 15964.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842260.0 + "value": 1868744.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0267835816582325 + "value": 1.1374058573694177 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 120980.0 + "value": 108866.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2107015.0 + "value": 2106234.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.383471766363472 + "value": 1.8629439943995987 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15680471098111826 + "value": 0.021828306865919127 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35448,13 +35448,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.58929895016895 + "value": 73.8672609653546 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97091384986095 + "value": 99.98281451609408 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35466,7 +35466,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35478,43 +35478,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10854858752.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 837287936.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.23222970282393 + "value": 42.34509806818282 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35526,13 +35526,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.76912344821457 + "value": 29.808037629777782 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875272916875422 + "value": 1.0588548523273116 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35544,7 +35544,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.49705335062431 + "value": 96.44913969231415 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35563,47 +35563,47 @@ "time" ], "times": { - "compilation": 15888.808, - "data": 61455.533, - "framework": 2751476.562, - "kernel_overhead": 1297411.834, - "profiling_overhead": 51651.744, - "profiling_runs": 1340957.451, + "compilation_time": 23380.559, + "data": 77164.129, + "framework": 3331831.443, + "kernel_overhead": 1575300.664, + "profiling_overhead": 64023.778, + "profiling_runs": 1615342.872, "runtimes": [ - 6278.4 + 5617.728 ], - "search_algorithm": 24.981, - "validation": 20.65 + "search_algorithm": 46.751, + "validation": 30.198 }, - "timestamp": "2026-03-02 14:28:43 UTC" + "timestamp": "2026-03-05 08:59:51 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 8, + "x": 16, "y": 32, - "z": 64 + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 32, + "x": 16, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 25 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -35611,61 +35611,61 @@ { "name": "time", "unit": "", - "value": 6343.968 + "value": 3368.0 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.720477684463746 + "value": 19.185887750830652 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14444.0 + "value": 7208.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842384.0 + "value": 1870148.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0233861854047352 + "value": 1.8640471010703108 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 117946.0 + "value": 60682.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109341.0 + "value": 2100454.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.377582364623292 + "value": 6.2271739728338815 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15673282467868527 + "value": 0.07294943692153376 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35695,13 +35695,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.11190322949062 + "value": 98.68448483285528 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9705158371198 + "value": 99.96018313852841 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35713,7 +35713,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35725,43 +35725,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 10854858752.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1392508928.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 837287936.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.216003633880455 + "value": 42.77189124935575 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35773,13 +35773,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.75695781018483 + "value": 49.81998569312231 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.872148343826767 + "value": 3.344847672267733 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35791,7 +35791,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.45913215432238 + "value": 89.86161422140057 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35810,47 +35810,47 @@ "time" ], "times": { - "compilation": 16524.438, - "data": 60943.771, - "framework": 2741024.51, - "kernel_overhead": 1292675.638, - "profiling_overhead": 51229.376, - "profiling_runs": 1336175.725, + "compilation_time": 23737.275, + "data": 76243.924, + "framework": 1737459.703, + "kernel_overhead": 784567.889, + "profiling_overhead": 63235.061, + "profiling_runs": 813412.829, "runtimes": [ - 6343.968 + 3368.0 ], - "search_algorithm": 25.351, - "validation": 17.333 + "search_algorithm": 47.201, + "validation": 26.52 }, - "timestamp": "2026-03-02 14:28:44 UTC" + "timestamp": "2026-03-05 08:59:52 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 16, - "y": 64, - "z": 32 + "y": 32, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 16, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -35858,61 +35858,61 @@ { "name": "time", "unit": "", - "value": 8130.816 + "value": 6283.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.129069302975833 + "value": 10.29594977525894 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 23400.0 + "value": 17540.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1940648.0 + "value": 1870668.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.84507229292939 + "value": 1.0246750835882101 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 883544.0 + "value": 120321.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138429299.0 + "value": 2106523.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.159954563704993 + "value": 1.6607121937831946 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05963846220710941 + "value": 0.019457398889693174 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35930,25 +35930,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.26924897031162 + "value": 81.99982124551232 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.02114560957686 + "value": 99.97432925336001 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35960,7 +35960,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -35972,43 +35972,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4250927104.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2722103296.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 289406976.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 420413440.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.356358162282225 + "value": 43.24541028159244 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36020,13 +36020,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.55782628519487 + "value": 26.5726566834338 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.059439451678967 + "value": 0.9439261590428756 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36038,7 +36038,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.19723358932365 + "value": 94.27779125807814 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36057,19 +36057,19 @@ "time" ], "times": { - "compilation": 58757.048, - "data": 62066.507, - "framework": 923555.274, - "kernel_overhead": 377228.799, - "profiling_overhead": 52303.156, - "profiling_runs": 431956.812, + "compilation_time": 24185.16, + "data": 78146.607, + "framework": 3689080.967, + "kernel_overhead": 1751505.842, + "profiling_overhead": 64634.139, + "profiling_runs": 1794794.379, "runtimes": [ - 8130.816 + 6283.712 ], - "search_algorithm": 26.471, - "validation": 16.782 + "search_algorithm": 46.915, + "validation": 29.54 }, - "timestamp": "2026-03-02 14:28:45 UTC" + "timestamp": "2026-03-05 08:59:54 UTC" }, { "compilation_data": { @@ -36077,7 +36077,7 @@ "global_size": { "x": 16, "y": 32, - "z": 32 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -36086,18 +36086,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "16", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -36105,61 +36105,61 @@ { "name": "time", "unit": "", - "value": 8124.256 + "value": 5985.952 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.2003226108098 + "value": 10.797281811514486 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19644.0 + "value": 13160.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1932412.0 + "value": 1871268.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.81044085703491 + "value": 1.0721199205383505 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 922403.0 + "value": 111822.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421827.0 + "value": 2109269.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.10318603920842 + "value": 1.7422597444152297 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059565077501131575 + "value": 0.02041221036895768 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36177,25 +36177,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.83780089917505 + "value": 81.98707983621536 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.7403364441641 + "value": 99.96936569587847 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36207,7 +36207,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36219,43 +36219,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4250927104.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2722103296.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 289406976.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 420413440.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.376813518752316 + "value": 44.06313842560343 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36267,13 +36267,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.59092280409304 + "value": 27.878011458563474 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.080464144592307 + "value": 0.9902955730519982 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36285,7 +36285,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.24908867918433 + "value": 94.98874312246363 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36304,19 +36304,19 @@ "time" ], "times": { - "compilation": 15994.163, - "data": 63642.063, - "framework": 935007.602, - "kernel_overhead": 381444.325, - "profiling_overhead": 53753.685, - "profiling_runs": 436167.529, + "compilation_time": 24149.084, + "data": 79213.128, + "framework": 3610749.3710000003, + "kernel_overhead": 1711734.6, + "profiling_overhead": 66028.534, + "profiling_runs": 1753773.109, "runtimes": [ - 8124.256 + 5985.952 ], - "search_algorithm": 28.428, - "validation": 18.225 + "search_algorithm": 48.258, + "validation": 31.185 }, - "timestamp": "2026-03-02 14:28:46 UTC" + "timestamp": "2026-03-05 08:59:56 UTC" }, { "compilation_data": { @@ -36324,7 +36324,7 @@ "global_size": { "x": 8, "y": 128, - "z": 32 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -36333,18 +36333,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -36352,61 +36352,61 @@ { "name": "time", "unit": "", - "value": 8277.6 + "value": 6976.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.947119754208488 + "value": 8.919748393478073 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12660.0 + "value": 14832.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1913712.0 + "value": 1843604.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.734670222301155 + "value": 0.9290804387164939 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 673182.0 + "value": 133282.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424438.0 + "value": 2114657.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.177296595378048 + "value": 47.98223235977743 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05989702566520283 + "value": 0.5622404444389919 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36424,25 +36424,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.29228283916612 + "value": 65.30649118490541 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.5858572352346 + "value": 99.95456084202118 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36454,7 +36454,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36466,43 +36466,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4250927104.0 + "value": 2952790016.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2722103296.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 289406976.0 + "value": 318767104.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 420413440.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.465168870524554 + "value": 21.375209390200993 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36514,13 +36514,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.738118022621308 + "value": 23.999830950497238 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.17397048214371 + "value": 24.374828309098753 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36532,7 +36532,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.479603320892366 + "value": 47.20313458261315 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36551,47 +36551,47 @@ "time" ], "times": { - "compilation": 17177.367, - "data": 64184.544, - "framework": 928554.563, - "kernel_overhead": 377422.475, - "profiling_overhead": 54465.22, - "profiling_runs": 432482.324, + "compilation_time": 24158.466, + "data": 77772.106, + "framework": 541980.463, + "kernel_overhead": 175712.419, + "profiling_overhead": 64563.643, + "profiling_runs": 223932.295, "runtimes": [ - 8277.6 + 6976.736 ], - "search_algorithm": 27.536, - "validation": 18.017 + "search_algorithm": 53.542, + "validation": 33.108 }, - "timestamp": "2026-03-02 14:28:46 UTC" + "timestamp": "2026-03-05 08:59:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 32 + "y": 128, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -36599,61 +36599,61 @@ { "name": "time", "unit": "", - "value": 8272.416 + "value": 3624.288 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.143757356821042 + "value": 17.236768693607218 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22788.0 + "value": 2452.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1914668.0 + "value": 1836372.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.754066440799235 + "value": 1.7103301266846866 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 692430.0 + "value": 62647.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419959.0 + "value": 2099170.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.139622148189375 + "value": 45.69463555031057 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05954356571516568 + "value": 0.5353009826484592 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36671,25 +36671,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.76882272745249 + "value": 75.47143389281624 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.28407741308017 + "value": 99.91055038028239 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36701,7 +36701,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36713,43 +36713,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4250927104.0 + "value": 1509949440.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2722103296.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 289406976.0 + "value": 184549376.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 420413440.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.43016247404871 + "value": 28.97708426824793 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36761,13 +36761,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.67904008366368 + "value": 45.71991347474022 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.13644099064768 + "value": 23.306440267396873 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36779,7 +36779,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.387096419234176 + "value": 56.882484923107704 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36798,47 +36798,47 @@ "time" ], "times": { - "compilation": 17654.453, - "data": 64097.268, - "framework": 935152.174, - "kernel_overhead": 380943.277, - "profiling_overhead": 54290.395, - "profiling_runs": 435821.234, + "compilation_time": 22922.368, + "data": 79448.923, + "framework": 359343.609, + "kernel_overhead": 92599.387, + "profiling_overhead": 64437.211, + "profiling_runs": 122858.088, "runtimes": [ - 8272.416 + 3624.288 ], - "search_algorithm": 26.555, - "validation": 16.453 + "search_algorithm": 48.938, + "validation": 34.116 }, - "timestamp": "2026-03-02 14:28:47 UTC" + "timestamp": "2026-03-05 08:59:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 32, - "registers": 27 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -36846,61 +36846,61 @@ { "name": "time", "unit": "", - "value": 8217.536 + "value": 2228.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.873440032256297 + "value": 28.53126361224409 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7872.0 + "value": 6200.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912632.0 + "value": 1838280.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.81108370121944 + "value": 2.7988896410853266 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 621873.0 + "value": 43516.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424159.0 + "value": 2100136.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.143402181694774 + "value": 37.92711637824371 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05994123534554356 + "value": 0.4442566460665906 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36918,25 +36918,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.4738915739795 + "value": 95.88531299371621 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.6960128883412 + "value": 99.91894651512241 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36948,7 +36948,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -36960,43 +36960,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4250927104.0 + "value": 591396864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2722103296.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 289406976.0 + "value": 146800640.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 420413440.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.460604963076063 + "value": 43.238934598148695 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37008,13 +37008,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.730261604143386 + "value": 75.88130537107199 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.168979661616476 + "value": 19.489046203703058 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37026,7 +37026,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.46730880328442 + "value": 64.54445266367635 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37045,47 +37045,47 @@ "time" ], "times": { - "compilation": 17742.418, - "data": 64756.71, - "framework": 930839.614, - "kernel_overhead": 378293.681, - "profiling_overhead": 54344.094, - "profiling_runs": 433445.129, + "compilation_time": 24044.82, + "data": 77904.741, + "framework": 232560.98599999998, + "kernel_overhead": 33602.637, + "profiling_overhead": 64140.925, + "profiling_runs": 56912.683, "runtimes": [ - 8217.536 + 2228.256 ], - "search_algorithm": 33.693, - "validation": 18.958 + "search_algorithm": 46.617, + "validation": 26.696 }, - "timestamp": "2026-03-02 14:28:47 UTC" + "timestamp": "2026-03-05 08:59:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 16 + "x": 8, + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 34 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -37093,61 +37093,61 @@ { "name": "time", "unit": "", - "value": 8102.208 + "value": 1990.304 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.503675328285718 + "value": 32.142936969155116 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13992.0 + "value": 2712.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2022264.0 + "value": 1838024.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.18675748014839 + "value": 3.1677998854257843 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17151845.0 + "value": 35187.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416998.0 + "value": 2099436.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5394646986922833 + "value": 21.537040691324844 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02946643169617346 + "value": 0.25222391892708773 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37165,25 +37165,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.63168229980182 + "value": 97.95824959494315 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.08406460865255 + "value": 99.91193593301041 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37195,7 +37195,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37207,43 +37207,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3769630720.0 + "value": 564133888.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2453667840.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 144703488.0 + "value": 115343360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379617280.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.069531360436164 + "value": 43.63054587225425 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37255,13 +37255,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.508683836511022 + "value": 86.16831431582216 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.731407770738604 + "value": 11.233857383947518 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37273,7 +37273,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.00341064024267 + "value": 61.72406449131474 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37292,45 +37292,45 @@ "time" ], "times": { - "compilation": 75771.1, - "data": 62824.066, - "framework": 891007.0690000001, - "kernel_overhead": 360350.754, - "profiling_overhead": 53148.894, - "profiling_runs": 414683.355, + "compilation_time": 23822.036, + "data": 77405.131, + "framework": 229063.786, + "kernel_overhead": 32661.987, + "profiling_overhead": 64386.502, + "profiling_runs": 54610.166, "runtimes": [ - 8102.208 + 1990.304 ], - "search_algorithm": 30.52, - "validation": 20.753 + "search_algorithm": 42.243, + "validation": 23.217 }, - "timestamp": "2026-03-02 14:28:48 UTC" + "timestamp": "2026-03-05 08:59:56 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, + "x": 8, + "y": 128, "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 34 + "private_memory_size": 0, + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -37340,49 +37340,49 @@ { "name": "time", "unit": "", - "value": 8379.103 + "value": 1860.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.450887062097389 + "value": 34.33315999617463 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14016.0 + "value": 716.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2021500.0 + "value": 1837388.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.42673126957164 + "value": 3.360913602977359 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 18125517.0 + "value": 31476.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419265.0 + "value": 2099174.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5488469278593695 + "value": 11.452660450103078 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -37394,7 +37394,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02957978862727899 + "value": 0.13411339130070343 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37412,25 +37412,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.02710759722736 + "value": 98.25139844852092 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.10890918956747 + "value": 99.90575165873643 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37442,7 +37442,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37454,25 +37454,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3769630720.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2453667840.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 144703488.0 + "value": 558891008.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -37484,13 +37484,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379617280.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.10520755587862 + "value": 37.78870097511392 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37502,13 +37502,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.582366952905012 + "value": 91.64111202261034 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.77355609635167 + "value": 6.152662550346154 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37520,7 +37520,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.107634573462356 + "value": 66.50624182160324 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37539,19 +37539,19 @@ "time" ], "times": { - "compilation": 18614.155, - "data": 64504.736, - "framework": 898370.9469999999, - "kernel_overhead": 362071.492, - "profiling_overhead": 54579.036, - "profiling_runs": 417215.683, + "compilation_time": 25976.204, + "data": 76949.149, + "framework": 234517.464, + "kernel_overhead": 36203.128, + "profiling_overhead": 63837.546, + "profiling_runs": 57527.641, "runtimes": [ - 8379.103 + 1860.704 ], - "search_algorithm": 30.304, - "validation": 20.007 + "search_algorithm": 30.616, + "validation": 24.73 }, - "timestamp": "2026-03-02 14:28:48 UTC" + "timestamp": "2026-03-05 08:59:57 UTC" }, { "compilation_data": { @@ -37559,7 +37559,7 @@ "global_size": { "x": 8, "y": 128, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -37568,18 +37568,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 34 + "private_memory_size": 0, + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -37587,61 +37587,61 @@ { "name": "time", "unit": "", - "value": 8251.296 + "value": 1801.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.051920271911973 + "value": 35.53418433042789 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3104.0 + "value": 356.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999092.0 + "value": 1836460.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.63251171735445 + "value": 3.4790664465648664 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 14939468.0 + "value": 29535.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138423017.0 + "value": 2098979.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5689191858103726 + "value": 5.931460255308655 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02981739755850645 + "value": 0.06945531724880524 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37659,25 +37659,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.22183644857643 + "value": 81.69100383089314 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.80911096449381 + "value": 99.90346598311662 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37689,7 +37689,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37701,43 +37701,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3769630720.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2453667840.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 144703488.0 + "value": 283639808.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379617280.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.114373912907315 + "value": 37.64519713262409 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37749,13 +37749,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.600674574690707 + "value": 94.92129115226228 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.7840284493409 + "value": 3.3718378570933014 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37767,7 +37767,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.13350637885929 + "value": 58.68388362376331 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37786,37 +37786,37 @@ "time" ], "times": { - "compilation": 19131.648, - "data": 64510.979, - "framework": 894893.003, - "kernel_overhead": 360205.968, - "profiling_overhead": 54585.181, - "profiling_runs": 415590.875, + "compilation_time": 23962.465, + "data": 78362.365, + "framework": 211194.99, + "kernel_overhead": 23267.102, + "profiling_overhead": 65058.9, + "profiling_runs": 44506.623, "runtimes": [ - 8251.296 + 1801.632 ], - "search_algorithm": 28.181, - "validation": 21.741 + "search_algorithm": 32.276, + "validation": 24.111 }, - "timestamp": "2026-03-02 14:28:49 UTC" + "timestamp": "2026-03-05 08:59:57 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 16 + "y": 128, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 34 + "private_memory_size": 0, + "registers": 22 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -37824,9 +37824,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -37834,61 +37834,61 @@ { "name": "time", "unit": "", - "value": 8137.632 + "value": 6219.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.990619072385731 + "value": 9.545415038739156 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5624.0 + "value": 6404.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999464.0 + "value": 1838656.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.92644022258126 + "value": 0.9987229231401331 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 16040746.0 + "value": 109461.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416525.0 + "value": 2100518.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5613219244462293 + "value": 26.286276846293894 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03018060913546997 + "value": 0.30797285788686707 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37906,25 +37906,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.51499255124493 + "value": 97.96946830588224 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 101.1835186611363 + "value": 99.97092038407884 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37936,7 +37936,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -37948,7 +37948,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3769630720.0 + "value": 10880024576.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -37960,31 +37960,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2453667840.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 144703488.0 + "value": 2248146944.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379617280.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.996726592689113 + "value": 43.02645845501432 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37996,13 +37996,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.362304166829073 + "value": 26.287994987655104 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.647675454804814 + "value": 13.400716194878873 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38014,7 +38014,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.796421233666546 + "value": 79.91682014027153 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38033,37 +38033,37 @@ "time" ], "times": { - "compilation": 19115.332, - "data": 64693.932, - "framework": 899171.416, - "kernel_overhead": 362456.244, - "profiling_overhead": 54775.383, - "profiling_runs": 417245.857, + "compilation_time": 26164.951, + "data": 79103.581, + "framework": 2196984.139, + "kernel_overhead": 1003776.618, + "profiling_overhead": 65820.221, + "profiling_runs": 1048283.719, "runtimes": [ - 8137.632 + 6219.232 ], - "search_algorithm": 34.914, - "validation": 18.656 + "search_algorithm": 55.983, + "validation": 29.226 }, - "timestamp": "2026-03-02 14:28:49 UTC" + "timestamp": "2026-03-05 08:59:58 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 16 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 64, - "registers": 34 + "private_memory_size": 0, + "registers": 25 }, "configuration": { "INNER_UNROLL_FACTOR": "1", @@ -38071,9 +38071,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -38081,61 +38081,61 @@ { "name": "time", "unit": "", - "value": 8240.127 + "value": 6144.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.311145868323815 + "value": 10.058490005591278 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18120.0 + "value": 920.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2001996.0 + "value": 1841216.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 51.711631834122606 + "value": 1.0147087164549977 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 8383106.0 + "value": 102358.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421377.0 + "value": 2099490.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.601184825751652 + "value": 13.384364571771066 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029857958924701375 + "value": 0.15681604219944198 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38153,25 +38153,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.39645880845715 + "value": 98.73096051384066 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.04594490015953 + "value": 99.97077186756191 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38183,7 +38183,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38195,7 +38195,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3769630720.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38207,31 +38207,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2453667840.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 144703488.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 379617280.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.20537951789363 + "value": 36.235091304396015 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38243,13 +38243,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.789267025119948 + "value": 26.771095893400947 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.891907382777353 + "value": 6.875779511684033 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38261,7 +38261,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.400224806380564 + "value": 83.50313500484299 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38280,47 +38280,47 @@ "time" ], "times": { - "compilation": 18600.939, - "data": 64595.093, - "framework": 895366.896, - "kernel_overhead": 360830.178, - "profiling_overhead": 54752.841, - "profiling_runs": 415188.784, + "compilation_time": 23539.685, + "data": 78102.244, + "framework": 2775194.272, + "kernel_overhead": 1294130.703, + "profiling_overhead": 64893.993, + "profiling_runs": 1338067.332, "runtimes": [ - 8240.127 + 6144.704 ], - "search_algorithm": 28.138, - "validation": 18.241 + "search_algorithm": 46.736, + "validation": 29.396 }, - "timestamp": "2026-03-02 14:28:50 UTC" + "timestamp": "2026-03-05 08:59:59 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 32, + "registers": 27 }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -38328,61 +38328,61 @@ { "name": "time", "unit": "", - "value": 9253.984 + "value": 8072.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.775040268499445 + "value": 8.04897187303079 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6704.0 + "value": 16484.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2297088.0 + "value": 1912776.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 75.45248369937089 + "value": 48.6625117633078 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111326794.0 + "value": 609694.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415637.0 + "value": 138419700.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.116670363339105 + "value": 5.08855867674404 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012990189087805623 + "value": 0.059218682067700666 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38412,13 +38412,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.98073269676199 + "value": 93.33135533592075 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.77652951177926 + "value": 98.09977888294472 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38430,7 +38430,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38442,7 +38442,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3528982528.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38454,31 +38454,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 359219200.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.828323321331658 + "value": 12.385534521328793 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38490,13 +38490,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.955619878069218 + "value": 20.604847815094697 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.703311669947807 + "value": 13.089310062225682 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38508,7 +38508,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.028438400047456 + "value": 32.27081945680216 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38527,47 +38527,47 @@ "time" ], "times": { - "compilation": 73503.121, - "data": 66346.394, - "framework": 887508.971, - "kernel_overhead": 350859.814, - "profiling_overhead": 55838.638, - "profiling_runs": 414464.125, + "compilation_time": 24440.348, + "data": 78734.352, + "framework": 961842.9010000001, + "kernel_overhead": 380984.278, + "profiling_overhead": 65779.525, + "profiling_runs": 436344.746, "runtimes": [ - 9253.984 + 8072.448 ], - "search_algorithm": 43.092, - "validation": 36.843 + "search_algorithm": 79.203, + "validation": 37.402 }, - "timestamp": "2026-03-02 14:28:50 UTC" + "timestamp": "2026-03-05 09:00:0 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 8 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 64, + "registers": 34 }, "configuration": { "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -38575,61 +38575,61 @@ { "name": "time", "unit": "", - "value": 9582.336 + "value": 8130.784 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.200406648411306 + "value": 7.900402362411498 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 26620.0 + "value": 2848.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2297844.0 + "value": 1999612.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 77.21577443380141 + "value": 53.602665436616036 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 121642075.0 + "value": 14899102.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138426378.0 + "value": 138419214.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1045934637214303 + "value": 2.594141396113949 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01263563591130812 + "value": 0.029836958069677018 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38659,13 +38659,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.25250065012403 + "value": 89.24919211840647 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 96.8756928108476 + "value": 98.78342280171512 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38677,7 +38677,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -38689,7 +38689,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3528982528.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -38701,31 +38701,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 359219200.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.764100576795764 + "value": 10.123598332366093 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38737,13 +38737,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.808238988553505 + "value": 20.619549445846758 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.623666260049605 + "value": 11.794825281156971 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38755,7 +38755,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.83122973332753 + "value": 29.160138261166452 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38774,19 +38774,19 @@ "time" ], "times": { - "compilation": 21280.612, - "data": 64163.99, - "framework": 887605.735, - "kernel_overhead": 352272.434, - "profiling_overhead": 54421.129, - "profiling_runs": 416748.182, + "compilation_time": 24218.91, + "data": 78312.064, + "framework": 924757.922, + "kernel_overhead": 363142.723, + "profiling_overhead": 65336.315, + "profiling_runs": 417966.82, "runtimes": [ - 9582.336 + 8130.784 ], - "search_algorithm": 32.109, - "validation": 24.724 + "search_algorithm": 48.697, + "validation": 27.214 }, - "timestamp": "2026-03-02 14:28:51 UTC" + "timestamp": "2026-03-05 09:00:1 UTC" }, { "compilation_data": { @@ -38822,49 +38822,49 @@ { "name": "time", "unit": "", - "value": 9353.088 + "value": 9178.752 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.676980173588034 + "value": 7.512745632239801 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6780.0 + "value": 20220.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2201796.0 + "value": 2194648.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 73.2189469175961 + "value": 73.27596037252229 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 100375148.0 + "value": 100265387.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419868.0 + "value": 138418147.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1345222952271643 + "value": 1.129554155330695 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -38876,7 +38876,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013213699741943585 + "value": 0.01326300229168878 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38906,13 +38906,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.51071675154098 + "value": 89.15328889802461 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.50990661598375 + "value": 99.68511493565813 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38972,7 +38972,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.985234542270512 + "value": 7.91838810241922 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38984,13 +38984,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.314000422985252 + "value": 18.16561995299617 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.896982406317836 + "value": 9.8167968178606 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39002,7 +39002,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.508026974850655 + "value": 24.309410651844388 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39021,47 +39021,47 @@ "time" ], "times": { - "compilation": 21022.224, - "data": 63041.019, - "framework": 882059.7279999999, - "kernel_overhead": 352114.126, - "profiling_overhead": 53094.882, - "profiling_runs": 413809.701, + "compilation_time": 24709.323, + "data": 79571.877, + "framework": 915947.878, + "kernel_overhead": 354085.079, + "profiling_overhead": 65344.495, + "profiling_runs": 416946.427, "runtimes": [ - 9353.088 + 9178.752 ], - "search_algorithm": 29.869, - "validation": 21.326 + "search_algorithm": 45.035, + "validation": 31.079 }, - "timestamp": "2026-03-02 14:28:51 UTC" + "timestamp": "2026-03-05 09:00:1 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 8 + "y": 128, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -39069,61 +39069,61 @@ { "name": "time", "unit": "", - "value": 9566.048 + "value": 3972.032 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.689490571237353 + "value": 15.787015189095458 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16064.0 + "value": 3524.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2203744.0 + "value": 1837732.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.41344404854513 + "value": 1.5694314330587167 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118661340.0 + "value": 68983.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417059.0 + "value": 2099750.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.098209084603213 + "value": 20.895878747324076 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012625767564057892 + "value": 0.2448179161767602 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39141,25 +39141,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.46761822117791 + "value": 98.74736528742896 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.72916732682745 + "value": 99.96466892625487 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39171,7 +39171,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39183,43 +39183,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3528982528.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 359219200.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.686837732407105 + "value": 39.61355797678639 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39231,13 +39231,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.638931943907977 + "value": 41.79702503190437 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.532171840293042 + "value": 10.734978108780124 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39249,7 +39249,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.604660723387145 + "value": 83.34973387772632 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39268,47 +39268,47 @@ "time" ], "times": { - "compilation": 21113.852, - "data": 63087.309, - "framework": 885094.192, - "kernel_overhead": 353008.009, - "profiling_overhead": 53220.388, - "profiling_runs": 415778.486, + "compilation_time": 23975.864, + "data": 78273.726, + "framework": 1414019.4139999999, + "kernel_overhead": 619194.085, + "profiling_overhead": 65136.846, + "profiling_runs": 651414.757, "runtimes": [ - 9566.048 + 3972.032 ], - "search_algorithm": 29.353, - "validation": 21.363 + "search_algorithm": 48.854, + "validation": 25.401 }, - "timestamp": "2026-03-02 14:28:52 UTC" + "timestamp": "2026-03-05 09:00:2 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 8 + "y": 128, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -39316,61 +39316,61 @@ { "name": "time", "unit": "", - "value": 8775.232 + "value": 4215.552 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.682030135315506 + "value": 14.847682196047693 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 24748.0 + "value": 4924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2205224.0 + "value": 1837728.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.384258458295484 + "value": 1.4790217160700008 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46639972.0 + "value": 74544.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425882.0 + "value": 2100249.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2208052445019124 + "value": 9.81892512849066 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.014113423733518345 + "value": 0.11503605385143993 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39388,25 +39388,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.99503397902264 + "value": 98.92691866663995 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.61927959264396 + "value": 99.9678956565511 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39418,7 +39418,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39430,43 +39430,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3528982528.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 1073741824.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 2319450112.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 359219200.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.604963210496246 + "value": 43.26739049728806 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39478,13 +39478,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.739469448289622 + "value": 39.2782497388217 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.66731338471413 + "value": 5.120748379035837 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39496,7 +39496,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.41562088811541 + "value": 90.93688106975301 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39515,47 +39515,47 @@ "time" ], "times": { - "compilation": 20048.823, - "data": 63328.07, - "framework": 877791.0549999999, - "kernel_overhead": 351603.916, - "profiling_overhead": 53231.508, - "profiling_runs": 409627.561, + "compilation_time": 23870.534, + "data": 77324.653, + "framework": 1917854.088, + "kernel_overhead": 871462.818, + "profiling_overhead": 64274.521, + "profiling_runs": 904792.096, "runtimes": [ - 8775.232 + 4215.552 ], - "search_algorithm": 34.271, - "validation": 20.189 + "search_algorithm": 57.625, + "validation": 27.879 }, - "timestamp": "2026-03-02 14:28:52 UTC" + "timestamp": "2026-03-05 09:00:3 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 64 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 26 + "registers": 38 }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -39563,61 +39563,61 @@ { "name": "time", "unit": "", - "value": 3939.648 + "value": 6903.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.36165719728759 + "value": 9.184425418034566 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12060.0 + "value": 832.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873336.0 + "value": 1839768.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5694895026912192 + "value": 0.9348135401804258 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 80616.0 + "value": 115263.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101164.0 + "value": 2103382.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.894371321002005 + "value": 3.0412437887061583 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24478427763015234 + "value": 0.035632601777576184 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39647,13 +39647,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.5293467870994 + "value": 98.85410824412926 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96006543118801 + "value": 99.97656318394232 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39665,7 +39665,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39677,7 +39677,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5221908480.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39689,31 +39689,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2193620992.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 535298048.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.61045113899097 + "value": 36.47847535620343 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39725,13 +39725,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79320665241534 + "value": 24.330891866530628 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.733997411704332 + "value": 1.6335437654531062 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39743,7 +39743,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34220133222632 + "value": 87.97997947019509 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39762,47 +39762,47 @@ "time" ], "times": { - "compilation": 67937.636, - "data": 61139.095, - "framework": 1378305.634, - "kernel_overhead": 616833.811, - "profiling_overhead": 51257.155, - "profiling_runs": 649075.573, + "compilation_time": 24408.376, + "data": 76201.076, + "framework": 3152492.208, + "kernel_overhead": 1483563.673, + "profiling_overhead": 63213.95, + "profiling_runs": 1529513.509, "runtimes": [ - 3939.648 + 6903.776 ], - "search_algorithm": 26.63, - "validation": 23.172 + "search_algorithm": 50.44, + "validation": 31.183 }, - "timestamp": "2026-03-02 14:28:53 UTC" + "timestamp": "2026-03-05 09:00:5 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 64 + "x": 8, + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 26 + "private_memory_size": 128, + "registers": 32 }, "configuration": { "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -39810,61 +39810,61 @@ { "name": "time", "unit": "", - "value": 4009.696 + "value": 9419.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.274245526872335 + "value": 7.512734828942912 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1076.0 + "value": 14496.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870988.0 + "value": 2199888.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5602848653773602 + "value": 81.7199263060559 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68623.0 + "value": 133004985.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100075.0 + "value": 138426999.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.895342893181475 + "value": 1.1129817407828222 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24478554546135017 + "value": 0.01287214272843041 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39882,25 +39882,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.48662080554776 + "value": 98.86011929677683 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96157359475532 + "value": 98.46560880709742 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39912,7 +39912,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -39924,7 +39924,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5221908480.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -39936,31 +39936,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2193620992.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 535298048.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.60914710688622 + "value": 7.778050797872386 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39972,13 +39972,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79279255990257 + "value": 17.848633397454325 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.7338910578656 + "value": 5.183337262273418 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39990,7 +39990,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34140359113592 + "value": 17.191973958176984 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40009,19 +40009,19 @@ "time" ], "times": { - "compilation": 17136.205, - "data": 66282.431, - "framework": 1391796.605, - "kernel_overhead": 618658.069, - "profiling_overhead": 56395.443, - "profiling_runs": 650460.662, + "compilation_time": 26344.782, + "data": 77531.561, + "framework": 589961.96, + "kernel_overhead": 192458.676, + "profiling_overhead": 64832.627, + "profiling_runs": 255139.096, "runtimes": [ - 4009.696 + 9419.264 ], - "search_algorithm": 26.521, - "validation": 20.832 + "search_algorithm": 33.08, + "validation": 29.682 }, - "timestamp": "2026-03-02 14:28:54 UTC" + "timestamp": "2026-03-05 09:00:5 UTC" }, { "compilation_data": { @@ -40029,7 +40029,7 @@ "global_size": { "x": 8, "y": 128, - "z": 64 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -40039,17 +40039,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 26 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -40057,61 +40057,61 @@ { "name": "time", "unit": "", - "value": 4080.224 + "value": 2839.04 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.029993186199562 + "value": 22.412756416224436 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11852.0 + "value": 8040.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841252.0 + "value": 1840808.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5653363072438502 + "value": 2.210598843561634 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 78818.0 + "value": 56077.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104938.0 + "value": 2104252.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.894731209843474 + "value": 14.86028684687657 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24477825561130695 + "value": 0.17407918690513116 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40141,13 +40141,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.5853839145707 + "value": 98.80352031798236 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95773420503716 + "value": 99.95630799053643 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40159,7 +40159,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40171,43 +40171,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5221908480.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2193620992.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 535298048.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.60979916505507 + "value": 45.947575104895826 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40219,13 +40219,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79315316608874 + "value": 59.445001846116995 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.733983674493496 + "value": 7.749909908649041 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40237,7 +40237,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34209218104593 + "value": 88.39897776073991 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40256,47 +40256,47 @@ "time" ], "times": { - "compilation": 17107.172, - "data": 65551.825, - "framework": 1389663.93, - "kernel_overhead": 618156.522, - "profiling_overhead": 55822.607, - "profiling_runs": 650132.976, + "compilation_time": 23286.028, + "data": 77528.53, + "framework": 1197747.8220000002, + "kernel_overhead": 514690.22, + "profiling_overhead": 64431.905, + "profiling_runs": 541097.167, "runtimes": [ - 4080.224 + 2839.04 ], - "search_algorithm": 31.347, - "validation": 20.618 + "search_algorithm": 40.351, + "validation": 31.443 }, - "timestamp": "2026-03-02 14:28:55 UTC" + "timestamp": "2026-03-05 09:00:6 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 64 + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 26 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -40304,61 +40304,61 @@ { "name": "time", "unit": "", - "value": 4055.552 + "value": 3433.408 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.009903602295859 + "value": 18.214500887874173 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5352.0 + "value": 824.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837340.0 + "value": 1837296.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5641518669563421 + "value": 1.8054145829631547 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71455.0 + "value": 58626.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100689.0 + "value": 2099900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.894594591144457 + "value": 6.037865353269417 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24476231210152738 + "value": 0.07073559841521865 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40388,13 +40388,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.397416805066 + "value": 98.70261585520078 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95374370385154 + "value": 99.9681423929816 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40406,7 +40406,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40418,43 +40418,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5221908480.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2193620992.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 535298048.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.60823188700584 + "value": 42.224851823466636 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40466,13 +40466,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79209940926341 + "value": 48.30422375456628 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.733713031871364 + "value": 3.243081428834406 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40484,7 +40484,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34002133970858 + "value": 92.41076730026462 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40503,47 +40503,47 @@ "time" ], "times": { - "compilation": 16204.943, - "data": 64730.565, - "framework": 1386203.366, - "kernel_overhead": 617024.109, - "profiling_overhead": 54823.11, - "profiling_runs": 649625.582, + "compilation_time": 24178.548, + "data": 77800.833, + "framework": 1883855.96, + "kernel_overhead": 856205.541, + "profiling_overhead": 64133.812, + "profiling_runs": 885715.774, "runtimes": [ - 4055.552 + 3433.408 ], - "search_algorithm": 23.247, - "validation": 18.273 + "search_algorithm": 37.431, + "validation": 27.653 }, - "timestamp": "2026-03-02 14:28:55 UTC" + "timestamp": "2026-03-05 09:00:7 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 64 + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 26 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -40551,61 +40551,61 @@ { "name": "time", "unit": "", - "value": 4199.616 + "value": 5580.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.645153801862005 + "value": 11.346890462545408 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 484.0 + "value": 16120.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837520.0 + "value": 1839004.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5575190926150668 + "value": 1.143832056107912 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 65667.0 + "value": 109664.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099173.0 + "value": 2106172.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.924264771217338 + "value": 1.863003721371147 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 33554432.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24514210706204476 + "value": 0.02182763761900974 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40635,13 +40635,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.09200468178501 + "value": 73.90623529157122 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96158869262082 + "value": 99.97657476866488 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40653,7 +40653,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 5637144576.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40665,43 +40665,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5221908480.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1090519040.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 2193620992.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 12582912.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 535298048.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.66725057599654 + "value": 42.34648197185445 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40713,13 +40713,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.85366281099409 + "value": 29.80898405262761 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.749524725870552 + "value": 1.0588884715960247 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40731,7 +40731,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.46282150289375 + "value": 96.45216991700806 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40750,47 +40750,47 @@ "time" ], "times": { - "compilation": 16186.575, - "data": 65553.243, - "framework": 1389465.913, - "kernel_overhead": 617980.502, - "profiling_overhead": 55676.053, - "profiling_runs": 650256.115, + "compilation_time": 24822.534, + "data": 78017.84, + "framework": 3358895.26, + "kernel_overhead": 1587971.169, + "profiling_overhead": 65032.283, + "profiling_runs": 1627873.968, "runtimes": [ - 4199.616 + 5580.448 ], - "search_algorithm": 24.335, - "validation": 19.022 + "search_algorithm": 71.487, + "validation": 28.682 }, - "timestamp": "2026-03-02 14:28:56 UTC" + "timestamp": "2026-03-05 09:00:8 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 32 + "x": 8, + "y": 128, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 30 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -40798,61 +40798,61 @@ { "name": "time", "unit": "", - "value": 4191.167 + "value": 3341.024 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.622128975265017 + "value": 18.906208881578948 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12044.0 + "value": 1368.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874036.0 + "value": 1837828.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.480133420005051 + "value": 1.858903657239731 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 86959.0 + "value": 56261.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102320.0 + "value": 2099284.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.818284256403933 + "value": 6.227236841665717 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11502842432121015 + "value": 0.07295546581779942 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40882,13 +40882,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.9881837599281 + "value": 98.71694853486564 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96947760708863 + "value": 99.96730626043154 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40900,7 +40900,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -40912,43 +40912,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5863636992.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1373634560.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 621477888.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.263543915002835 + "value": 42.77234941685757 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40960,13 +40960,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27502317853031 + "value": 49.8205528667569 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120327728841597 + "value": 3.344885751552282 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40978,7 +40978,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.92948541967701 + "value": 89.86253127202497 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40997,47 +40997,47 @@ "time" ], "times": { - "compilation": 59256.458, - "data": 63798.898, - "framework": 1889481.542, - "kernel_overhead": 869355.885, - "profiling_overhead": 54027.056, - "profiling_runs": 902299.703, + "compilation_time": 26133.776, + "data": 78921.26, + "framework": 1756314.173, + "kernel_overhead": 791363.282, + "profiling_overhead": 65892.935, + "profiling_runs": 820136.696, "runtimes": [ - 4191.167 + 3341.024 ], - "search_algorithm": 30.036, - "validation": 16.935 + "search_algorithm": 35.939, + "validation": 26.307 }, - "timestamp": "2026-03-02 14:28:57 UTC" + "timestamp": "2026-03-05 09:00:9 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 32 + "x": 8, + "y": 128, + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 2, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 30 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -41045,61 +41045,61 @@ { "name": "time", "unit": "", - "value": 4325.248 + "value": 6329.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.424446392359016 + "value": 10.156435205198644 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13876.0 + "value": 13184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1875424.0 + "value": 1851336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4852132960123188 + "value": 1.0421575256306586 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 86693.0 + "value": 118511.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106049.0 + "value": 2144717.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.81811205748006 + "value": 1.6607503194129056 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11501900322105627 + "value": 0.019450315839699004 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41129,13 +41129,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.80566035001763 + "value": 82.03408434095341 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95910205334381 + "value": 99.93390822091152 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41147,7 +41147,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41159,43 +41159,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5863636992.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1373634560.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 621477888.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.2645440877433 + "value": 43.24724323448953 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41207,13 +41207,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27588279571508 + "value": 26.57372760914939 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120439798074183 + "value": 0.9439642009597744 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41225,7 +41225,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93150369662779 + "value": 94.28155122418902 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41244,19 +41244,19 @@ "time" ], "times": { - "compilation": 20099.09, - "data": 65448.24, - "framework": 1888469.4810000001, - "kernel_overhead": 867420.456, - "profiling_overhead": 54804.516, - "profiling_runs": 900796.269, + "compilation_time": 24166.253, + "data": 78628.2, + "framework": 3716924.085, + "kernel_overhead": 1764615.651, + "profiling_overhead": 65431.499, + "profiling_runs": 1808248.735, "runtimes": [ - 4325.248 + 6329.568 ], - "search_algorithm": 34.629, - "validation": 21.03 + "search_algorithm": 35.864, + "validation": 29.999 }, - "timestamp": "2026-03-02 14:28:58 UTC" + "timestamp": "2026-03-05 09:00:11 UTC" }, { "compilation_data": { @@ -41264,7 +41264,7 @@ "global_size": { "x": 8, "y": 128, - "z": 32 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -41274,17 +41274,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 30 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "2", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -41292,61 +41292,61 @@ { "name": "time", "unit": "", - "value": 4395.488 + "value": 5938.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.300228871317394 + "value": 10.62469801900467 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13456.0 + "value": 16792.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841416.0 + "value": 1840888.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.474778771877703 + "value": 1.0741959221541613 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 85713.0 + "value": 118000.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105119.0 + "value": 2107124.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.818107460994959 + "value": 1.74226574401011 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11502664226906152 + "value": 0.020412961113973564 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41376,13 +41376,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.9198127511204 + "value": 82.02649049128317 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96708520190391 + "value": 99.97233741013348 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41394,7 +41394,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41406,43 +41406,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5863636992.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1373634560.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 621477888.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.264464575275596 + "value": 44.06317023909161 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41454,13 +41454,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.275354631518155 + "value": 27.87820807530725 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.12037094073015 + "value": 0.9903025573625988 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41472,7 +41472,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93025455497249 + "value": 94.98937109747226 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41491,19 +41491,19 @@ "time" ], "times": { - "compilation": 18875.479, - "data": 65213.848, - "framework": 1892616.432, - "kernel_overhead": 869480.629, - "profiling_overhead": 54984.999, - "profiling_runs": 902936.956, + "compilation_time": 24181.83, + "data": 79218.936, + "framework": 3631007.884, + "kernel_overhead": 1722633.797, + "profiling_overhead": 64593.292, + "profiling_runs": 1764561.859, "runtimes": [ - 4395.488 + 5938.336 ], - "search_algorithm": 38.983, - "validation": 22.538 + "search_algorithm": 45.797, + "validation": 34.889 }, - "timestamp": "2026-03-02 14:28:59 UTC" + "timestamp": "2026-03-05 09:00:13 UTC" }, { "compilation_data": { @@ -41511,7 +41511,7 @@ "global_size": { "x": 8, "y": 64, - "z": 32 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -41521,17 +41521,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 30 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -41539,61 +41539,61 @@ { "name": "time", "unit": "", - "value": 4527.808 + "value": 6877.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.268009055982436 + "value": 9.237515992956766 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 780.0 + "value": 21168.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838736.0 + "value": 1845632.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4714708020314364 + "value": 0.9344360220264809 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71335.0 + "value": 139642.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099900.0 + "value": 2109478.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.818250988273213 + "value": 48.014922984451935 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11502039206340572 + "value": 0.5624578441982503 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41623,13 +41623,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.8290860239727 + "value": 79.48798521642885 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95954486141625 + "value": 99.93532409227332 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41641,7 +41641,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41653,43 +41653,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5863636992.0 + "value": 2952790016.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1373634560.0 + "value": 318767104.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 621477888.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.265512485384086 + "value": 21.38788905568714 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41701,13 +41701,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27618305859592 + "value": 24.013732451901003 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120478943674371 + "value": 24.388947021461956 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41719,7 +41719,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93219886509544 + "value": 47.23052863273042 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41738,47 +41738,47 @@ "time" ], "times": { - "compilation": 16962.238, - "data": 66009.275, - "framework": 1889435.213, - "kernel_overhead": 867033.188, - "profiling_overhead": 55988.89, - "profiling_runs": 900403.86, + "compilation_time": 26406.627, + "data": 82304.778, + "framework": 552693.309, + "kernel_overhead": 177725.813, + "profiling_overhead": 67007.363, + "profiling_runs": 225655.355, "runtimes": [ - 4527.808 + 6877.376 ], - "search_algorithm": 25.943, - "validation": 18.136 + "search_algorithm": 34.745, + "validation": 29.34 }, - "timestamp": "2026-03-02 14:29:0 UTC" + "timestamp": "2026-03-05 09:00:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 30 + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -41786,61 +41786,61 @@ { "name": "time", "unit": "", - "value": 4415.648 + "value": 3620.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.294321443696766 + "value": 17.24837126215924 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5780.0 + "value": 1256.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837540.0 + "value": 1837172.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4724987501401523 + "value": 1.7112299626130505 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 74154.0 + "value": 61564.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100396.0 + "value": 2099489.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.820197007110334 + "value": 45.73549124998317 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11505090122269672 + "value": 0.5357720378925297 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41870,13 +41870,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.78063824314737 + "value": 92.90251745678604 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96686818090743 + "value": 99.94671025782395 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41888,7 +41888,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -41900,43 +41900,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5863636992.0 + "value": 1509949440.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1373634560.0 + "value": 184549376.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 621477888.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.27317453608271 + "value": 28.991484890286433 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41948,13 +41948,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.28372302939407 + "value": 45.743590541624236 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.121461937914168 + "value": 23.31851002219517 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41966,7 +41966,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.94965071863393 + "value": 56.912038713924275 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41985,47 +41985,47 @@ "time" ], "times": { - "compilation": 16717.378, - "data": 63308.564, - "framework": 1883345.6460000002, - "kernel_overhead": 866639.302, - "profiling_overhead": 53577.464, - "profiling_runs": 899820.316, + "compilation_time": 24055.848, + "data": 77380.205, + "framework": 357689.67, + "kernel_overhead": 92963.249, + "profiling_overhead": 64314.252, + "profiling_runs": 123031.964, "runtimes": [ - 4415.648 + 3620.736 ], - "search_algorithm": 26.015, - "validation": 18.09 + "search_algorithm": 29.801, + "validation": 22.124 }, - "timestamp": "2026-03-02 14:29:1 UTC" + "timestamp": "2026-03-05 09:00:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 16 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -42033,61 +42033,61 @@ { "name": "time", "unit": "", - "value": 6785.248 + "value": 2226.88 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.228294259313869 + "value": 28.558367973079967 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11472.0 + "value": 7820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870984.0 + "value": 1838924.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9370103991671824 + "value": 2.800476384481305 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 122680.0 + "value": 45002.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101479.0 + "value": 2100456.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.041119157464842 + "value": 37.93013035287782 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03563094655575491 + "value": 0.4441285923594015 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42117,13 +42117,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.85671656581624 + "value": 95.16744930230006 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97623552438426 + "value": 99.89070583528695 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42135,7 +42135,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42147,43 +42147,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8738832384.0 + "value": 591396864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1157627904.0 + "value": 146800640.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 970653696.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.4768872473672 + "value": 43.23908417441648 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42195,13 +42195,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.329841374616166 + "value": 75.88087980309557 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6334732368211535 + "value": 19.488936902552865 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42213,7 +42213,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.97622704770431 + "value": 64.54425208590936 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42232,47 +42232,47 @@ "time" ], "times": { - "compilation": 52167.45, - "data": 62867.205, - "framework": 3118279.801, - "kernel_overhead": 1478474.159, - "profiling_overhead": 53071.528, - "profiling_runs": 1523866.909, + "compilation_time": 24803.522, + "data": 78563.459, + "framework": 234278.985, + "kernel_overhead": 34198.898, + "profiling_overhead": 64103.015, + "profiling_runs": 57413.613, "runtimes": [ - 6785.248 + 2226.88 ], - "search_algorithm": 30.774, - "validation": 18.848 + "search_algorithm": 34.441, + "validation": 27.724 }, - "timestamp": "2026-03-02 14:29:3 UTC" + "timestamp": "2026-03-05 09:00:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 16 + "x": 8, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -42280,61 +42280,61 @@ { "name": "time", "unit": "", - "value": 6931.264 + "value": 1976.032 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.224742528320665 + "value": 32.36140608144552 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18784.0 + "value": 4828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1876748.0 + "value": 1838312.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9423400671137603 + "value": 3.1681256238279145 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 131203.0 + "value": 37705.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2111310.0 + "value": 2100074.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0410468905774724 + "value": 21.537668578997533 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03563072442470502 + "value": 0.2521396370978498 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42364,13 +42364,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.81698101403725 + "value": 97.82733943125086 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97762332948358 + "value": 99.87658484987145 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42382,7 +42382,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42394,43 +42394,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8738832384.0 + "value": 564133888.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1157627904.0 + "value": 115343360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 970653696.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.476185316791046 + "value": 43.63132467567954 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42442,13 +42442,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.3293519728617 + "value": 86.17000964283255 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6334403790373457 + "value": 11.234078405584128 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42460,7 +42460,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.97447478231129 + "value": 61.72546089922437 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42479,45 +42479,45 @@ "time" ], "times": { - "compilation": 15950.954, - "data": 64770.071, - "framework": 3128184.708, - "kernel_overhead": 1481349.587, - "profiling_overhead": 55066.712, - "profiling_runs": 1526998.338, + "compilation_time": 24564.823, + "data": 77751.773, + "framework": 229319.14800000002, + "kernel_overhead": 32557.192, + "profiling_overhead": 64597.861, + "profiling_runs": 54412.322, "runtimes": [ - 6931.264 + 1976.032 ], - "search_algorithm": 25.4, - "validation": 16.999 + "search_algorithm": 36.376, + "validation": 22.448 }, - "timestamp": "2026-03-02 14:29:5 UTC" + "timestamp": "2026-03-05 09:00:14 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, + "y": 64, "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -42527,49 +42527,49 @@ { "name": "time", "unit": "", - "value": 6905.696 + "value": 1850.272 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.056790028439909 + "value": 34.422002032520325 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10000.0 + "value": 3596.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838468.0 + "value": 1839000.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9312986338546073 + "value": 3.3648208058219975 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 121871.0 + "value": 33710.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101548.0 + "value": 2100262.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.041127018280761 + "value": 11.45206350257764 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -42581,7 +42581,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.035632302131764534 + "value": 0.13413207118780235 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42611,13 +42611,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.84921316233583 + "value": 98.09655018486 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98004037950605 + "value": 99.91773419898439 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42629,7 +42629,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42641,13 +42641,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8738832384.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -42659,7 +42659,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1157627904.0 + "value": 558891008.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -42671,13 +42671,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 970653696.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.47685696535977 + "value": 37.78974534334782 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42689,13 +42689,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.329841065894787 + "value": 91.6428846840322 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6334732160940102 + "value": 6.1527815644797 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42707,7 +42707,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.97622665645966 + "value": 66.50772527192586 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42726,19 +42726,19 @@ "time" ], "times": { - "compilation": 15735.258, - "data": 64898.087, - "framework": 3123900.325, - "kernel_overhead": 1479218.67, - "profiling_overhead": 55010.031, - "profiling_runs": 1524773.537, + "compilation_time": 24453.211, + "data": 76754.081, + "framework": 234888.024, + "kernel_overhead": 36524.862, + "profiling_overhead": 63785.327, + "profiling_runs": 57823.754, "runtimes": [ - 6905.696 + 1850.272 ], - "search_algorithm": 25.161, - "validation": 16.746 + "search_algorithm": 32.345, + "validation": 24.391 }, - "timestamp": "2026-03-02 14:29:6 UTC" + "timestamp": "2026-03-05 09:00:14 UTC" }, { "compilation_data": { @@ -42746,7 +42746,7 @@ "global_size": { "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -42756,17 +42756,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -42774,61 +42774,61 @@ { "name": "time", "unit": "", - "value": 6891.904 + "value": 1790.912 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.05380661603722 + "value": 35.56012603377408 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6516.0 + "value": 5356.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841008.0 + "value": 1832892.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9372223724361698 + "value": 3.4877061363819974 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 122562.0 + "value": 33832.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108991.0 + "value": 2100204.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.041029516167503 + "value": 5.9310913698298 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.035632153343992626 + "value": 0.0694503004200251 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42858,13 +42858,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82300965108269 + "value": 81.68637721918192 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97952430698318 + "value": 99.91366329535798 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42876,7 +42876,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -42888,43 +42888,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8738832384.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1157627904.0 + "value": 283639808.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 970653696.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.47702753000923 + "value": 37.639812750371014 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42936,13 +42936,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.329865057979646 + "value": 94.90474780527818 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6334748268907233 + "value": 3.3712501966962827 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42954,7 +42954,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.97633008835561 + "value": 58.67380086401127 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42973,47 +42973,47 @@ "time" ], "times": { - "compilation": 16297.62, - "data": 62623.752, - "framework": 3122332.74, - "kernel_overhead": 1480593.667, - "profiling_overhead": 52806.692, - "profiling_runs": 1526308.629, + "compilation_time": 23650.668, + "data": 76770.581, + "framework": 209719.948, + "kernel_overhead": 24091.563, + "profiling_overhead": 64030.772, + "profiling_runs": 44827.032, "runtimes": [ - 6891.904 + 1790.912 ], - "search_algorithm": 27.91, - "validation": 20.141 + "search_algorithm": 41.216, + "validation": 25.538 }, - "timestamp": "2026-03-02 14:29:8 UTC" + "timestamp": "2026-03-05 09:00:15 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 16 + "y": 64, + "z": 128 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -43021,61 +43021,61 @@ { "name": "time", "unit": "", - "value": 6913.024 + "value": 6261.504 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.036414948780798 + "value": 9.469040018429538 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6956.0 + "value": 856.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837652.0 + "value": 1840592.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9283849954041656 + "value": 0.9973188251144917 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 117318.0 + "value": 104590.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100910.0 + "value": 2099910.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0388759222956625 + "value": 26.286308249835255 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.035603729792029366 + "value": 0.30797119837713616 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43105,13 +43105,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.56273970341462 + "value": 97.6692458008658 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96075373672561 + "value": 99.9713426916427 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43123,7 +43123,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43135,43 +43135,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 8738832384.0 + "value": 10880024576.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1157627904.0 + "value": 2248146944.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 970653696.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.45457990223357 + "value": 43.02597248224945 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43183,13 +43183,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.315022275682924 + "value": 26.287742287549797 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6324783022003915 + "value": 13.400587377051751 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43201,7 +43201,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.92265524617609 + "value": 79.91610803289404 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43220,47 +43220,47 @@ "time" ], "times": { - "compilation": 14963.764, - "data": 61066.177, - "framework": 3115363.378, - "kernel_overhead": 1478756.242, - "profiling_overhead": 51340.994, - "profiling_runs": 1524199.965, + "compilation_time": 23945.467, + "data": 76991.868, + "framework": 2201109.058, + "kernel_overhead": 1007613.369, + "profiling_overhead": 63755.712, + "profiling_runs": 1052748.109, "runtimes": [ - 6913.024 + 6261.504 ], - "search_algorithm": 37.827, - "validation": 19.253 + "search_algorithm": 44.488, + "validation": 33.36 }, - "timestamp": "2026-03-02 14:29:9 UTC" + "timestamp": "2026-03-05 09:00:16 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 8 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -43268,61 +43268,61 @@ { "name": "time", "unit": "", - "value": 9363.008 + "value": 6164.608 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.3220848855615985 + "value": 10.095334291699471 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9792.0 + "value": 476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2279648.0 + "value": 1837760.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.4611827008901 + "value": 1.0202530959895233 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133101144.0 + "value": 101974.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417978.0 + "value": 2099167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1175336782359646 + "value": 13.384535362937921 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0130641842051674 + "value": 0.1568081026707688 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43340,25 +43340,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.86691024830382 + "value": 98.66617749294197 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 101.3181415536679 + "value": 99.96635931109842 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43370,7 +43370,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43382,43 +43382,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1918369792.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1245708288.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 258555904.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.67238810827138 + "value": 36.235008574614 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43430,13 +43430,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.60490855299957 + "value": 26.770922111768247 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.112558282176023 + "value": 6.875734878315478 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43448,7 +43448,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.957266099828598 + "value": 83.50264999864991 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43467,47 +43467,47 @@ "time" ], "times": { - "compilation": 67032.547, - "data": 66437.023, - "framework": 566409.247, - "kernel_overhead": 189763.668, - "profiling_overhead": 56350.57, - "profiling_runs": 253857.986, + "compilation_time": 24097.649, + "data": 77982.026, + "framework": 2787446.492, + "kernel_overhead": 1300638.781, + "profiling_overhead": 64865.803, + "profiling_runs": 1343959.882, "runtimes": [ - 9363.008 + 6164.608 ], - "search_algorithm": 28.102, - "validation": 23.69 + "search_algorithm": 42.463, + "validation": 37.421 }, - "timestamp": "2026-03-02 14:29:10 UTC" + "timestamp": "2026-03-05 09:00:17 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 8 + "x": 8, + "y": 64, + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -43515,61 +43515,61 @@ { "name": "time", "unit": "", - "value": 9490.112 + "value": 8107.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.19711906912455 + "value": 7.700819651129423 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 30032.0 + "value": 10680.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2288456.0 + "value": 1912864.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.83341451784501 + "value": 48.79969844476198 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133091404.0 + "value": 670579.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421294.0 + "value": 138422165.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0975096503264532 + "value": 5.1698815472034445 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012772913971513908 + "value": 0.05988375446286385 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43599,13 +43599,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.70413151566078 + "value": 97.81570087253475 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.34960656763944 + "value": 98.83095791212034 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43617,7 +43617,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43629,43 +43629,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1918369792.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1245708288.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 258555904.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.72772124193562 + "value": 12.431993883060178 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43677,13 +43677,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.731931847752954 + "value": 20.68210402402409 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.149446516821811 + "value": 13.138387370730145 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43695,7 +43695,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.07963513952265 + "value": 32.39187738019218 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43714,47 +43714,47 @@ "time" ], "times": { - "compilation": 18275.921, - "data": 65035.623, - "framework": 560221.8, - "kernel_overhead": 189245.162, - "profiling_overhead": 55073.237, - "profiling_runs": 250867.778, + "compilation_time": 25281.454, + "data": 77183.531, + "framework": 961418.2250000001, + "kernel_overhead": 381804.215, + "profiling_overhead": 64191.091, + "profiling_runs": 438239.388, "runtimes": [ - 9490.112 + 8107.936 ], - "search_algorithm": 27.549, - "validation": 18.525 + "search_algorithm": 63.456, + "validation": 37.843 }, - "timestamp": "2026-03-02 14:29:10 UTC" + "timestamp": "2026-03-05 09:00:18 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 64, + "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -43762,61 +43762,61 @@ { "name": "time", "unit": "", - "value": 9606.88 + "value": 8116.128 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 6.103116125903225 + "value": 7.781059053002408 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22996.0 + "value": 6292.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2199796.0 + "value": 1995340.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.76483500401989 + "value": 53.93306690761545 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133028226.0 + "value": 15951348.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138422079.0 + "value": 138415389.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1027481499887914 + "value": 2.555559302367888 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012813657757462234 + "value": 0.029830086487344667 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43846,13 +43846,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.87424825361612 + "value": 90.59221740931332 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.67191979449011 + "value": 99.28389464211543 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43864,7 +43864,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -43876,43 +43876,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1918369792.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1245708288.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 258555904.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.805661107827541 + "value": 10.070175708306863 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43924,13 +43924,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.91191787260781 + "value": 20.51088525696867 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.201715407584714 + "value": 11.732667030536522 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43942,7 +43942,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.252980480585464 + "value": 29.006529622944804 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43961,19 +43961,19 @@ "time" ], "times": { - "compilation": 18799.33, - "data": 64353.537, - "framework": 560843.809, - "kernel_overhead": 190037.718, - "profiling_overhead": 54610.828, - "profiling_runs": 251841.726, + "compilation_time": 25812.059, + "data": 80030.98, + "framework": 935020.524, + "kernel_overhead": 366410.92, + "profiling_overhead": 66170.934, + "profiling_runs": 422407.69, "runtimes": [ - 9606.88 + 8116.128 ], - "search_algorithm": 30.038, - "validation": 26.663 + "search_algorithm": 42.311, + "validation": 33.443 }, - "timestamp": "2026-03-02 14:29:11 UTC" + "timestamp": "2026-03-05 09:00:18 UTC" }, { "compilation_data": { @@ -43994,7 +43994,7 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", @@ -44009,49 +44009,49 @@ { "name": "time", "unit": "", - "value": 9614.176 + "value": 9520.576 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.991099422655921 + "value": 7.069279270043059 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 20016.0 + "value": 6964.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2200828.0 + "value": 2199556.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.8205070250143 + "value": 76.44124660472498 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132985805.0 + "value": 119427196.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425471.0 + "value": 138416300.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1122671887745297 + "value": 1.1118285546576046 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44063,7 +44063,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01284624245839947 + "value": 0.012771289874294049 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44093,13 +44093,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7300263386235 + "value": 93.95876433558671 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.12346426349517 + "value": 98.61868153477063 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44123,19 +44123,19 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1918369792.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 536870912.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1245708288.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", @@ -44153,13 +44153,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 258555904.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.789651769092458 + "value": 7.706456951465252 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44171,13 +44171,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.874830620977086 + "value": 17.681302876563237 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.1909450741338485 + "value": 9.555069315740413 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44189,7 +44189,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.217277158988388 + "value": 23.661349242514078 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44208,37 +44208,37 @@ "time" ], "times": { - "compilation": 19970.347, - "data": 64151.336, - "framework": 560612.9750000001, - "kernel_overhead": 189712.119, - "profiling_overhead": 54009.674, - "profiling_runs": 252739.846, + "compilation_time": 24948.574, + "data": 78371.968, + "framework": 917842.0789999999, + "kernel_overhead": 354783.479, + "profiling_overhead": 65368.56, + "profiling_runs": 419318.072, "runtimes": [ - 9614.176 + 9520.576 ], - "search_algorithm": 41.885, - "validation": 24.625 + "search_algorithm": 91.964, + "validation": 46.615 }, - "timestamp": "2026-03-02 14:29:11 UTC" + "timestamp": "2026-03-05 09:00:19 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 8 + "y": 64, + "z": 64 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 128, - "registers": 32 + "private_memory_size": 0, + "registers": 26 }, "configuration": { "INNER_UNROLL_FACTOR": "2", @@ -44246,9 +44246,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -44256,61 +44256,61 @@ { "name": "time", "unit": "", - "value": 9339.136 + "value": 3949.888 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.835555241531504 + "value": 15.892384105960264 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11580.0 + "value": 5336.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2195272.0 + "value": 1837672.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.16668081653239 + "value": 1.5696086950736285 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118937418.0 + "value": 70478.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425960.0 + "value": 2099807.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1374183450649098 + "value": 20.89672084077298 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013202189294296568 + "value": 0.24479353066159498 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44328,25 +44328,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 136314880.0 + "value": 0.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.47917271430029 + "value": 98.49357781613146 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.35762785602634 + "value": 99.95620307090576 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44358,7 +44358,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44370,7 +44370,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 1918369792.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -44382,31 +44382,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 1245708288.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 72351744.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 258555904.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.9862019547807614 + "value": 39.61265975761161 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44418,13 +44418,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.326376417764706 + "value": 41.79640143988144 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.322076354719512 + "value": 10.7348179479383 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44436,7 +44436,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.652206133071427 + "value": 83.34858064692617 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44455,44 +44455,44 @@ "time" ], "times": { - "compilation": 20144.35, - "data": 63905.815, - "framework": 555627.475, - "kernel_overhead": 188262.938, - "profiling_overhead": 53734.288, - "profiling_runs": 249724.434, + "compilation_time": 26559.017, + "data": 78492.706, + "framework": 1424406.063, + "kernel_overhead": 624230.37, + "profiling_overhead": 65013.275, + "profiling_runs": 656669.712, "runtimes": [ - 9339.136 + 3949.888 ], - "search_algorithm": 31.022, - "validation": 22.872 + "search_algorithm": 38.687, + "validation": 31.652 }, - "timestamp": "2026-03-02 14:29:11 UTC" + "timestamp": "2026-03-05 09:00:20 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "8" @@ -44503,49 +44503,49 @@ { "name": "time", "unit": "", - "value": 2741.984 + "value": 4253.408 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.763345595376844 + "value": 14.871547616592068 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1084.0 + "value": 6728.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871724.0 + "value": 1839000.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.2278338722106152 + "value": 1.480495000462905 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 50357.0 + "value": 76593.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102907.0 + "value": 2100509.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.858438815052768 + "value": 9.81887559338704 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44557,7 +44557,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1740279685378733 + "value": 0.11502843508906466 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44587,13 +44587,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.76818115711123 + "value": 98.82290966280213 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93761382169838 + "value": 99.960623232402 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44617,13 +44617,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3042967552.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -44635,7 +44635,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1644167168.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -44647,13 +44647,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 399179776.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.9424643250319 + "value": 43.26809498250721 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44665,13 +44665,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.43862808275651 + "value": 39.27850578300555 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.749078954148432 + "value": 5.120781759796133 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44683,7 +44683,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.38960912380051 + "value": 90.93755799865345 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44702,47 +44702,47 @@ "time" ], "times": { - "compilation": 78706.291, - "data": 61578.182, - "framework": 1158989.48, - "kernel_overhead": 510063.884, - "profiling_overhead": 51632.745, - "profiling_runs": 535714.669, + "compilation_time": 24764.465, + "data": 82478.809, + "framework": 1938023.006, + "kernel_overhead": 876395.265, + "profiling_overhead": 68926.297, + "profiling_runs": 910222.635, "runtimes": [ - 2741.984 + 4253.408 ], - "search_algorithm": 28.087, - "validation": 18.382 + "search_algorithm": 44.293, + "validation": 32.638 }, - "timestamp": "2026-03-02 14:29:12 UTC" + "timestamp": "2026-03-05 09:00:21 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, - "z": 32 + "x": 8, + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -44750,61 +44750,61 @@ { "name": "time", "unit": "", - "value": 2827.936 + "value": 6876.992 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.471133190118152 + "value": 9.261553131948173 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1684.0 + "value": 12252.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872116.0 + "value": 1843212.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.205505617701552 + "value": 0.9398345660512881 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 51523.0 + "value": 124479.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103037.0 + "value": 2109052.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.858666207925905 + "value": 3.0412234130488933 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17405486212653737 + "value": 0.03563169768917074 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44834,13 +44834,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.66905412491526 + "value": 98.80536105062056 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95760351894512 + "value": 99.9736940575387 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44852,7 +44852,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -44864,43 +44864,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3042967552.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1644167168.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 399179776.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.941284150174724 + "value": 36.47851636128131 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44912,13 +44912,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.43592501321279 + "value": 24.33097278084056 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.748726552015533 + "value": 1.6335491979324106 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44930,7 +44930,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.38563263682863 + "value": 87.98032480703539 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44949,47 +44949,47 @@ "time" ], "times": { - "compilation": 20721.005, - "data": 63780.955, - "framework": 1164913.355, - "kernel_overhead": 510609.198, - "profiling_overhead": 53831.034, - "profiling_runs": 536692.168, + "compilation_time": 25075.63, + "data": 78790.388, + "framework": 3160905.192, + "kernel_overhead": 1485333.989, + "profiling_overhead": 65323.489, + "profiling_runs": 1531457.326, "runtimes": [ - 2827.936 + 6876.992 ], - "search_algorithm": 28.921, - "validation": 22.986 + "search_algorithm": 78.878, + "validation": 34.902 }, - "timestamp": "2026-03-02 14:29:13 UTC" + "timestamp": "2026-03-05 09:00:22 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 32 + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 31 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -44997,61 +44997,61 @@ { "name": "time", "unit": "", - "value": 2844.96 + "value": 9444.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.089884307327203 + "value": 7.214586908299501 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3652.0 + "value": 6424.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839868.0 + "value": 2200016.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.2109354565496284 + "value": 81.74878171257203 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 54160.0 + "value": 132970566.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103858.0 + "value": 138416562.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.858681736165321 + "value": 1.101854189782572 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17405179273134047 + "value": 0.01295048572331203 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45069,25 +45069,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7630406715831 + "value": 98.7285547840844 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95277156020902 + "value": 100.18918381344841 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45099,7 +45099,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45111,43 +45111,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3042967552.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1644167168.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 399179776.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.941960184606806 + "value": 7.691286725023169 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45159,13 +45159,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.437750107653585 + "value": 17.648342034424214 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.748964491573979 + "value": 5.125171594225489 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45177,7 +45177,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.38830683162813 + "value": 16.999106303836307 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45196,19 +45196,19 @@ "time" ], "times": { - "compilation": 18424.607, - "data": 65284.481, - "framework": 1167916.444, - "kernel_overhead": 510575.42, - "profiling_overhead": 55411.733, - "profiling_runs": 536644.81, + "compilation_time": 27960.862, + "data": 78533.738, + "framework": 590339.854, + "kernel_overhead": 191806.95, + "profiling_overhead": 64881.801, + "profiling_runs": 255117.365, "runtimes": [ - 2844.96 + 9444.736 ], - "search_algorithm": 27.312, - "validation": 17.709 + "search_algorithm": 42.46, + "validation": 30.934 }, - "timestamp": "2026-03-02 14:29:13 UTC" + "timestamp": "2026-03-05 09:00:23 UTC" }, { "compilation_data": { @@ -45244,49 +45244,49 @@ { "name": "time", "unit": "", - "value": 2842.304 + "value": 2792.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.27052401355835 + "value": 22.569444444444446 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8360.0 + "value": 464.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843836.0 + "value": 1839816.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.2134882201137924 + "value": 2.2088250096649693 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 56879.0 + "value": 47904.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106558.0 + "value": 2099171.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.858447368478153 + "value": 14.860144694511263 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -45298,7 +45298,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1740560208821898 + "value": 0.1740796061448911 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45328,13 +45328,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69006744209399 + "value": 98.68993908053892 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95549800589139 + "value": 99.9563749554725 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45394,7 +45394,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.94172142556084 + "value": 45.947488891749785 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45406,13 +45406,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.437572699555176 + "value": 59.44510518439561 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.748941362686149 + "value": 7.74992338097345 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45424,7 +45424,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.38808286873345 + "value": 88.39925566576926 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45443,37 +45443,37 @@ "time" ], "times": { - "compilation": 17862.991, - "data": 63531.883, - "framework": 1162913.62, - "kernel_overhead": 509749.555, - "profiling_overhead": 53770.908, - "profiling_runs": 535861.274, + "compilation_time": 24808.625, + "data": 77775.321, + "framework": 1193808.3090000001, + "kernel_overhead": 513088.504, + "profiling_overhead": 64032.088, + "profiling_runs": 538912.396, "runtimes": [ - 2842.304 + 2792.544 ], - "search_algorithm": 39.781, - "validation": 22.311 + "search_algorithm": 43.425, + "validation": 25.346 }, - "timestamp": "2026-03-02 14:29:14 UTC" + "timestamp": "2026-03-05 09:00:23 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 32, - "z": 32 + "y": 64, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 8, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 31 + "registers": 38 }, "configuration": { "INNER_UNROLL_FACTOR": "4", @@ -45481,9 +45481,9 @@ "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -45491,61 +45491,61 @@ { "name": "time", "unit": "", - "value": 2942.912 + "value": 3438.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.022522389818526 + "value": 18.340705859969557 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8640.0 + "value": 6936.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840360.0 + "value": 1843920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.203660020617124 + "value": 1.8369101984189582 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 56306.0 + "value": 64363.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105523.0 + "value": 2131844.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.865823032102757 + "value": 6.037968743338642 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 16777216.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17413233547342652 + "value": 0.07073497949503008 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45575,13 +45575,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.55847586238731 + "value": 98.6244823157165 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95189125706314 + "value": 99.96464312366544 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45593,7 +45593,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4966055936.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45605,7 +45605,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3042967552.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45617,31 +45617,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 553648128.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1644167168.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 6291456.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 399179776.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.96352502240998 + "value": 42.22617457955385 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45653,13 +45653,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.46577874689567 + "value": 48.30549198166996 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.752618615928293 + "value": 3.2431665759177823 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45671,7 +45671,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.43002469663749 + "value": 92.41329593708035 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45690,47 +45690,47 @@ "time" ], "times": { - "compilation": 18294.88, - "data": 63470.487, - "framework": 1163535.364, - "kernel_overhead": 509991.199, - "profiling_overhead": 53797.205, - "profiling_runs": 536276.473, + "compilation_time": 24721.758, + "data": 78262.588, + "framework": 1878639.315, + "kernel_overhead": 853164.604, + "profiling_overhead": 64697.951, + "profiling_runs": 882514.172, "runtimes": [ - 2942.912 + 3438.496 ], - "search_algorithm": 31.649, - "validation": 20.125 + "search_algorithm": 35.283, + "validation": 23.679 }, - "timestamp": "2026-03-02 14:29:15 UTC" + "timestamp": "2026-03-05 09:00:24 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 56 }, "configuration": { "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -45738,61 +45738,61 @@ { "name": "time", "unit": "", - "value": 3415.52 + "value": 5624.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.30445740384802 + "value": 11.307695630999325 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12492.0 + "value": 3148.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873664.0 + "value": 1839460.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8115898631218814 + "value": 1.137946424476207 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71450.0 + "value": 98384.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105994.0 + "value": 2104246.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.03729488107607 + "value": 1.8629396254946387 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07072896594631678 + "value": 0.021823630750987578 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45822,13 +45822,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.70568532851564 + "value": 73.87128442121306 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96625239598114 + "value": 99.96404320180227 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45840,7 +45840,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -45852,7 +45852,7 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3301965824.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", @@ -45864,31 +45864,31 @@ "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 513540096.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.22169988244044 + "value": 42.344028981832444 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45900,13 +45900,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30060771718336 + "value": 29.807248250783537 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2428386528870665 + "value": 1.0588268116428234 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45918,7 +45918,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.40394212827809 + "value": 96.44658551797258 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45937,32 +45937,32 @@ "time" ], "times": { - "compilation": 55945.424, - "data": 61814.739, - "framework": 1835174.9900000002, - "kernel_overhead": 846263.633, - "profiling_overhead": 51917.404, - "profiling_runs": 875179.214, + "compilation_time": 24859.089, + "data": 78048.58, + "framework": 3355439.174, + "kernel_overhead": 1586094.999, + "profiling_overhead": 65231.731, + "profiling_runs": 1626063.864, "runtimes": [ - 3415.52 + 5624.192 ], - "search_algorithm": 27.814, - "validation": 18.762 + "search_algorithm": 60.754, + "validation": 30.371 }, - "timestamp": "2026-03-02 14:29:16 UTC" + "timestamp": "2026-03-05 09:00:26 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 32, + "x": 8, + "y": 64, "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 8, + "x": 32, + "y": 4, "z": 1 }, "max_work_group_size": 1024, @@ -45970,12 +45970,12 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -45985,49 +45985,49 @@ { "name": "time", "unit": "", - "value": 3554.432 + "value": 3316.768 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.076518557214884 + "value": 18.95185564093792 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8656.0 + "value": 6832.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871084.0 + "value": 1836756.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.794527600812627 + "value": 1.8673008943705545 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64088.0 + "value": 61747.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099278.0 + "value": 2102890.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037124342642198 + "value": 6.227424115098213 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -46039,7 +46039,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07072559929412536 + "value": 0.07295563474519229 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46069,13 +46069,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.62715405681988 + "value": 98.68698568472067 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96403789183842 + "value": 99.96448362900611 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46099,13 +46099,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3301965824.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -46117,7 +46117,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -46129,13 +46129,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 513540096.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.220777670583956 + "value": 42.77310402813395 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46147,13 +46147,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.29937859289387 + "value": 49.82207497903768 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2427561311147004 + "value": 3.344987944149258 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46165,7 +46165,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.4016237963141 + "value": 89.86538420599011 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46184,47 +46184,47 @@ "time" ], "times": { - "compilation": 16784.544, - "data": 65951.887, - "framework": 1844479.3020000001, - "kernel_overhead": 846476.635, - "profiling_overhead": 56227.625, - "profiling_runs": 875823.155, + "compilation_time": 24203.763, + "data": 77936.848, + "framework": 1748959.326, + "kernel_overhead": 788849.022, + "profiling_overhead": 64563.894, + "profiling_runs": 817609.562, "runtimes": [ - 3554.432 + 3316.768 ], - "search_algorithm": 25.443, - "validation": 17.297 + "search_algorithm": 43.562, + "validation": 25.205 }, - "timestamp": "2026-03-02 14:29:16 UTC" + "timestamp": "2026-03-05 09:00:27 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 16 + "y": 64, + "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 4, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -46232,61 +46232,61 @@ { "name": "time", "unit": "", - "value": 3575.2 + "value": 6288.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.79049516861903 + "value": 10.091708547091953 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2196.0 + "value": 12596.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839208.0 + "value": 1840584.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.790323728902174 + "value": 1.0213701488695492 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59167.0 + "value": 115520.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099244.0 + "value": 2109655.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037349238153336 + "value": 1.6606604593534566 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07072879495988264 + "value": 0.01945609325954036 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46316,13 +46316,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69873117570742 + "value": 81.99438654004841 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96394722997593 + "value": 99.96857820355152 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46334,7 +46334,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46346,43 +46346,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3301965824.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 513540096.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.22295182879703 + "value": 43.24515528422781 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46394,13 +46394,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30160475909495 + "value": 26.57240218981999 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2429055929568142 + "value": 0.9439171188034201 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46412,7 +46412,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.40584957018105 + "value": 94.2768883338397 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46431,19 +46431,19 @@ "time" ], "times": { - "compilation": 16415.616, - "data": 62488.649, - "framework": 1836528.683, - "kernel_overhead": 846164.012, - "profiling_overhead": 52537.018, - "profiling_runs": 875339.004, + "compilation_time": 24433.959, + "data": 79260.738, + "framework": 3719222.7299999995, + "kernel_overhead": 1765346.545, + "profiling_overhead": 66146.353, + "profiling_runs": 1808469.094, "runtimes": [ - 3575.2 + 6288.928 ], - "search_algorithm": 25.555, - "validation": 19.025 + "search_algorithm": 34.645, + "validation": 28.769 }, - "timestamp": "2026-03-02 14:29:17 UTC" + "timestamp": "2026-03-05 09:00:29 UTC" }, { "compilation_data": { @@ -46451,7 +46451,7 @@ "global_size": { "x": 8, "y": 64, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -46461,17 +46461,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "16", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "4", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -46479,61 +46479,61 @@ { "name": "time", "unit": "", - "value": 3511.232 + "value": 5936.288 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.619644538606405 + "value": 10.611630281999474 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10296.0 + "value": 16748.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841104.0 + "value": 1838620.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8034813613845255 + "value": 1.0719808512402962 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68726.0 + "value": 114278.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104955.0 + "value": 2106241.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037435403295708 + "value": 1.7422212148292622 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07073021172957644 + "value": 0.02041312998834651 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46563,13 +46563,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.62663703605267 + "value": 81.98400134401336 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96521031819559 + "value": 99.97694531497237 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46581,7 +46581,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46593,43 +46593,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3301965824.0 + "value": 5944901632.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 101187584.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 150994944.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 1054343168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 513540096.0 + "value": 913571840.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.22322252217325 + "value": 44.06170121123424 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46641,13 +46641,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.301961972933185 + "value": 27.877153800759196 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2429295758194887 + "value": 0.9902651069361483 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46659,7 +46659,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.40656606508587 + "value": 94.98582051363933 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46678,19 +46678,19 @@ "time" ], "times": { - "compilation": 15957.24, - "data": 61037.96, - "framework": 1836748.4849999999, - "kernel_overhead": 847637.381, - "profiling_overhead": 51167.062, - "profiling_runs": 876906.082, + "compilation_time": 24722.366, + "data": 77285.507, + "framework": 3615287.965, + "kernel_overhead": 1715812.871, + "profiling_overhead": 64546.034, + "profiling_runs": 1757643.553, "runtimes": [ - 3511.232 + 5936.288 ], - "search_algorithm": 26.16, - "validation": 20.673 + "search_algorithm": 44.18, + "validation": 31.376 }, - "timestamp": "2026-03-02 14:29:18 UTC" + "timestamp": "2026-03-05 09:00:31 UTC" }, { "compilation_data": { @@ -46698,7 +46698,7 @@ "global_size": { "x": 8, "y": 32, - "z": 16 + "z": 256 }, "local_memory_size": 0, "local_size": { @@ -46711,14 +46711,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "1" }, "correctness": 1, "invalidity": "correct", @@ -46726,61 +46726,61 @@ { "name": "time", "unit": "", - "value": 3511.744 + "value": 6865.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 13.875535470734002 + "value": 8.862184314713947 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6140.0 + "value": 2868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842996.0 + "value": 1842304.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7954308372664665 + "value": 0.9229860526596289 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 63498.0 + "value": 119400.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101561.0 + "value": 2105180.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.017775570686679 + "value": 48.27052748999128 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 134217728.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07044975991297436 + "value": 0.5655699457428771 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46810,13 +46810,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.88698364238269 + "value": 93.11833134569186 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87989539259831 + "value": 99.97621901757134 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46828,7 +46828,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 8589934592.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -46840,43 +46840,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 3301965824.0 + "value": 2952790016.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 4311744512.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1429209088.0 + "value": 318767104.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 50331648.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 513540096.0 + "value": 527958016.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.09159777743576 + "value": 21.49688886078385 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46888,13 +46888,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.15153497930194 + "value": 24.13672430186117 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2328301072529384 + "value": 24.51386061907775 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46906,7 +46906,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.11881657315995 + "value": 47.47248207250062 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46925,47 +46925,47 @@ "time" ], "times": { - "compilation": 16523.436, - "data": 66063.532, - "framework": 1841855.179, - "kernel_overhead": 845095.378, - "profiling_overhead": 56308.443, - "profiling_runs": 874387.826, + "compilation_time": 24665.438, + "data": 77947.126, + "framework": 542210.712, + "kernel_overhead": 175954.631, + "profiling_overhead": 64666.104, + "profiling_runs": 223642.851, "runtimes": [ - 3511.744 + 6865.76 ], - "search_algorithm": 25.734, - "validation": 19.267 + "search_algorithm": 37.882, + "validation": 25.618 }, - "timestamp": "2026-03-02 14:29:19 UTC" + "timestamp": "2026-03-05 09:00:31 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -46973,61 +46973,61 @@ { "name": "time", "unit": "", - "value": 5500.928 + "value": 3631.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.917535216385462 + "value": 17.310942152530977 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14148.0 + "value": 6612.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870628.0 + "value": 1838884.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1412000921393666 + "value": 1.715406862425362 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109163.0 + "value": 65308.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102920.0 + "value": 2101480.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8628315264466573 + "value": 45.72948291338111 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02182549444389482 + "value": 0.5356574799970314 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47057,13 +47057,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90350555293487 + "value": 93.87215583888523 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97289330604909 + "value": 99.93992318613759 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47075,7 +47075,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 6442450944.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47087,43 +47087,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4806148096.0 + "value": 1509949440.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 142606336.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1222115328.0 + "value": 184549376.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 868564992.0 + "value": 333971456.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34384605715055 + "value": 28.98759654304713 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47135,13 +47135,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.807154814962228 + "value": 45.73691557471624 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588234925725106 + "value": 23.315107353517458 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47153,7 +47153,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.4462831899665 + "value": 56.90379400679579 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47172,47 +47172,47 @@ "time" ], "times": { - "compilation": 81890.705, - "data": 62898.288, - "framework": 3304737.0360000003, - "kernel_overhead": 1574772.148, - "profiling_overhead": 53128.46, - "profiling_runs": 1613938.14, + "compilation_time": 24520.518, + "data": 77963.475, + "framework": 359034.719, + "kernel_overhead": 92904.447, + "profiling_overhead": 64644.011, + "profiling_runs": 123522.786, "runtimes": [ - 5500.928 + 3631.2 ], - "search_algorithm": 25.371, - "validation": 14.5 + "search_algorithm": 36.809, + "validation": 27.443 }, - "timestamp": "2026-03-02 14:29:21 UTC" + "timestamp": "2026-03-05 09:00:32 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 8 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -47220,61 +47220,61 @@ { "name": "time", "unit": "", - "value": 5645.472 + "value": 2231.424 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.82223856874319 + "value": 28.590428169125815 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16108.0 + "value": 5060.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870208.0 + "value": 1840244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1364284268810139 + "value": 2.800416474024113 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108091.0 + "value": 41211.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106242.0 + "value": 2100272.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.862827447639034 + "value": 37.99212538023481 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.021826386865460148 + "value": 0.4449778529547002 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47304,13 +47304,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.86837724439764 + "value": 95.46414085106828 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97861722805314 + "value": 99.91758031659226 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47322,7 +47322,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5368709120.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47334,43 +47334,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4806148096.0 + "value": 591396864.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 71303168.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1222115328.0 + "value": 146800640.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 868564992.0 + "value": 228327424.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.343078488500936 + "value": 43.31002983312871 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47382,13 +47382,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.806667025276568 + "value": 76.00553042181487 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588061650824563 + "value": 19.520951661071592 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47400,7 +47400,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.4447048607884 + "value": 64.65031582143423 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47419,47 +47419,47 @@ "time" ], "times": { - "compilation": 18053.297, - "data": 65717.839, - "framework": 3315162.654, - "kernel_overhead": 1577164.395, - "profiling_overhead": 55693.982, - "profiling_runs": 1616586.438, + "compilation_time": 24528.642, + "data": 77323.318, + "framework": 231816.72, + "kernel_overhead": 33615.581, + "profiling_overhead": 63989.701, + "profiling_runs": 56888.12, "runtimes": [ - 5645.472 + 2231.424 ], - "search_algorithm": 26.769, - "validation": 19.347 + "search_algorithm": 36.559, + "validation": 23.36 }, - "timestamp": "2026-03-02 14:29:23 UTC" + "timestamp": "2026-03-05 09:00:32 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -47467,61 +47467,61 @@ { "name": "time", "unit": "", - "value": 5839.2 + "value": 1936.672 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.630559126729043 + "value": 32.640868168657256 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16260.0 + "value": 6948.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839788.0 + "value": 1840076.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1386888891702602 + "value": 3.2223475633067986 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109338.0 + "value": 39289.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106083.0 + "value": 2104429.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8628765599866357 + "value": 21.79161473305648 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.021826663678289473 + "value": 0.2552221573043323 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47551,13 +47551,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90123455253439 + "value": 96.73632578735163 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97697302968265 + "value": 99.89381028891323 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47569,7 +47569,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4831838208.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47581,43 +47581,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4806148096.0 + "value": 564133888.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 69206016.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1222115328.0 + "value": 115343360.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 868564992.0 + "value": 192282624.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34436950801246 + "value": 44.157755051299716 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47629,13 +47629,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.80753524771838 + "value": 87.20843607952823 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588370064802306 + "value": 11.369459195915057 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47647,7 +47647,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.44751414694855 + "value": 62.46935035668396 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47666,47 +47666,47 @@ "time" ], "times": { - "compilation": 16470.423, - "data": 61850.789, - "framework": 3309157.5810000002, - "kernel_overhead": 1577748.124, - "profiling_overhead": 51964.899, - "profiling_runs": 1617593.769, + "compilation_time": 24698.542, + "data": 76695.425, + "framework": 227253.72, + "kernel_overhead": 32495.878, + "profiling_overhead": 63719.927, + "profiling_runs": 54342.49, "runtimes": [ - 5839.2 + 1936.672 ], - "search_algorithm": 27.492, - "validation": 16.258 + "search_algorithm": 43.634, + "validation": 24.552 }, - "timestamp": "2026-03-02 14:29:25 UTC" + "timestamp": "2026-03-05 09:00:32 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -47714,61 +47714,61 @@ { "name": "time", "unit": "", - "value": 5678.336 + "value": 1827.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.591648465768426 + "value": 35.021866332538565 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11272.0 + "value": 4848.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836628.0 + "value": 1837672.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1321781396723627 + "value": 3.4246966512993438 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 101817.0 + "value": 34480.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101128.0 + "value": 2100020.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.862827853036964 + "value": 11.654054812821633 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02182551692799029 + "value": 0.13647255755436913 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47798,13 +47798,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.86891850700779 + "value": 98.08988486958107 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97463247680257 + "value": 99.89894430902328 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47816,7 +47816,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4563402752.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -47828,43 +47828,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4806148096.0 + "value": 209715200.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 67108864.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1222115328.0 + "value": 558891008.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 868564992.0 + "value": 194805760.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34320769028313 + "value": 38.45647812040454 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47876,13 +47876,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.806666992179693 + "value": 93.25950999934733 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588061639067736 + "value": 6.261319641069463 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47894,7 +47894,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.44470475369766 + "value": 67.68100480283489 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47913,19 +47913,19 @@ "time" ], "times": { - "compilation": 15530.091, - "data": 63575.907, - "framework": 3313732.618, - "kernel_overhead": 1578314.781, - "profiling_overhead": 53879.474, - "profiling_runs": 1617962.456, + "compilation_time": 24577.089, + "data": 77136.855, + "framework": 234084.409, + "kernel_overhead": 35825.818, + "profiling_overhead": 64064.571, + "profiling_runs": 57057.165, "runtimes": [ - 5678.336 + 1827.712 ], - "search_algorithm": 27.303, - "validation": 17.739 + "search_algorithm": 44.633, + "validation": 24.566 }, - "timestamp": "2026-03-02 14:29:26 UTC" + "timestamp": "2026-03-05 09:00:32 UTC" }, { "compilation_data": { @@ -47943,10 +47943,10 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 56 + "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", + "INNER_UNROLL_FACTOR": "0", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", @@ -47961,49 +47961,49 @@ { "name": "time", "unit": "", - "value": 5639.968 + "value": 1799.904 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.692012854116548 + "value": 35.56145848786036 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16924.0 + "value": 6132.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842260.0 + "value": 1834916.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1372204853789638 + "value": 3.5184998841520843 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110023.0 + "value": 35259.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106770.0 + "value": 2100200.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8625157323214108 + "value": 5.961831917408487 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -48015,7 +48015,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02181796410341218 + "value": 0.06977761564981086 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48045,13 +48045,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.55780491403087 + "value": 81.25260497346576 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94145854759049 + "value": 99.82755053993611 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48063,7 +48063,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4429185024.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -48075,13 +48075,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 4806148096.0 + "value": 138412032.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 268435456.0 + "value": 33554432.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -48093,7 +48093,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1222115328.0 + "value": 283639808.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -48105,13 +48105,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 868564992.0 + "value": 165953536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34262199029218 + "value": 37.85417208805649 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48123,13 +48123,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.806242663222516 + "value": 95.43428046859238 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0587910906979678 + "value": 3.390060500043992 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48141,7 +48141,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.44329645311322 + "value": 59.00117852168162 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48160,47 +48160,47 @@ "time" ], "times": { - "compilation": 16080.574, - "data": 60861.705, - "framework": 3303154.818, - "kernel_overhead": 1575884.462, - "profiling_overhead": 51136.997, - "profiling_runs": 1615271.654, + "compilation_time": 24334.354, + "data": 76929.907, + "framework": 209143.915, + "kernel_overhead": 23648.318, + "profiling_overhead": 63803.298, + "profiling_runs": 44762.392, "runtimes": [ - 5639.968 + 1799.904 ], - "search_algorithm": 24.76, - "validation": 17.113 + "search_algorithm": 36.057, + "validation": 22.071 }, - "timestamp": "2026-03-02 14:29:28 UTC" + "timestamp": "2026-03-05 09:00:32 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 16 + "x": 8, + "y": 32, + "z": 128 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "2" }, "correctness": 1, "invalidity": "correct", @@ -48208,61 +48208,61 @@ { "name": "time", "unit": "", - "value": 3303.008 + "value": 6304.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.734742007122565 + "value": 9.536935496657243 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11784.0 + "value": 6852.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873584.0 + "value": 1840988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8635238509652818 + "value": 0.9961468944835492 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71843.0 + "value": 109059.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101397.0 + "value": 2100242.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.226980936471538 + "value": 26.223188148362546 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 67108864.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07294578628558666 + "value": 0.3072354953008165 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48292,13 +48292,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.73430351740676 + "value": 96.97716782907379 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95998125480881 + "value": 99.96873945483638 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48310,7 +48310,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 6979321856.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -48322,43 +48322,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2563768320.0 + "value": 10880024576.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2164260864.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1362100224.0 + "value": 2248146944.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 25165824.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484179968.0 + "value": 816054272.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.76934537910117 + "value": 42.923889023291586 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48370,13 +48370,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.81759314662223 + "value": 26.225627206373623 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.344687039873319 + "value": 13.368923243874056 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48388,7 +48388,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.85728925772315 + "value": 79.72730278941437 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48407,47 +48407,47 @@ "time" ], "times": { - "compilation": 65293.297, - "data": 63496.608, - "framework": 1712998.582, - "kernel_overhead": 783656.865, - "profiling_overhead": 53741.778, - "profiling_runs": 812103.331, + "compilation_time": 24348.374, + "data": 78186.905, + "framework": 2195452.199, + "kernel_overhead": 1003753.289, + "profiling_overhead": 65048.167, + "profiling_runs": 1048463.838, "runtimes": [ - 3303.008 + 6304.704 ], - "search_algorithm": 27.969, - "validation": 15.797 + "search_algorithm": 42.057, + "validation": 28.959 }, - "timestamp": "2026-03-02 14:29:29 UTC" + "timestamp": "2026-03-05 09:00:33 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 16 + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 38 + "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -48455,37 +48455,37 @@ { "name": "time", "unit": "", - "value": 3407.68 + "value": 6144.608 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.516516001352505 + "value": 10.087598722579013 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 220.0 + "value": 104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868188.0 + "value": 1837524.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8492495276314964 + "value": 1.018433280485753 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 54216.0 + "value": 99190.0 }, { "name": "lts__t_sectors_op_write.sum", @@ -48497,19 +48497,19 @@ "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.226192736427134 + "value": 13.378509483061752 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07294506746083075 + "value": 0.15673982190871424 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48539,13 +48539,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.65698348810352 + "value": 98.1517333931006 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96305330552224 + "value": 99.96910015295781 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48557,7 +48557,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -48569,43 +48569,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2563768320.0 + "value": 10854858752.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1362100224.0 + "value": 1392508928.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484179968.0 + "value": 837287936.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.76742415948237 + "value": 36.21758260803667 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48617,13 +48617,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.81557126017632 + "value": 26.75853128432482 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3445512931026578 + "value": 6.8725524685326445 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48635,7 +48635,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.85367647477464 + "value": 83.46404006601857 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48654,47 +48654,47 @@ "time" ], "times": { - "compilation": 16715.234, - "data": 66066.615, - "framework": 1723266.625, - "kernel_overhead": 786346.833, - "profiling_overhead": 56019.978, - "profiling_runs": 814833.199, + "compilation_time": 24821.827, + "data": 77857.568, + "framework": 2773804.949, + "kernel_overhead": 1294252.32, + "profiling_overhead": 64444.816, + "profiling_runs": 1337250.245, "runtimes": [ - 3407.68 + 6144.608 ], - "search_algorithm": 26.945, - "validation": 18.512 + "search_algorithm": 45.613, + "validation": 29.089 }, - "timestamp": "2026-03-02 14:29:30 UTC" + "timestamp": "2026-03-05 09:00:35 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 16 + "y": 32, + "z": 32 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 32, + "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -48702,61 +48702,61 @@ { "name": "time", "unit": "", - "value": 3406.08 + "value": 8064.704 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.280191531255593 + "value": 8.090677234665323 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 404.0 + "value": 19412.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837660.0 + "value": 1912388.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8509482269161568 + "value": 48.85853826682277 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 56337.0 + "value": 628090.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 138421055.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.22672486742929 + "value": 5.107106411004788 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07294146056014625 + "value": 0.059446845362297024 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48774,25 +48774,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71937435301028 + "value": 96.65110886677385 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95405450017589 + "value": 98.16438543993496 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48804,7 +48804,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -48816,43 +48816,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2563768320.0 + "value": 4250927104.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2722103296.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1362100224.0 + "value": 289406976.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484179968.0 + "value": 420413440.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.76929635896512 + "value": 12.424864317518265 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48864,13 +48864,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.81759268435235 + "value": 20.67062284628662 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3446870088371328 + "value": 13.13109390772407 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48882,7 +48882,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.85729065093486 + "value": 32.373903885939896 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48901,45 +48901,45 @@ "time" ], "times": { - "compilation": 17545.1, - "data": 64000.657, - "framework": 1715930.788, - "kernel_overhead": 784525.639, - "profiling_overhead": 54279.984, - "profiling_runs": 813124.508, + "compilation_time": 24354.097, + "data": 78575.183, + "framework": 957781.711, + "kernel_overhead": 379431.39, + "profiling_overhead": 65399.014, + "profiling_runs": 434376.124, "runtimes": [ - 3406.08 + 8064.704 ], - "search_algorithm": 27.785, - "validation": 16.29 + "search_algorithm": 56.609, + "validation": 34.37 }, - "timestamp": "2026-03-02 14:29:31 UTC" + "timestamp": "2026-03-05 09:00:35 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 64, + "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "16" }, @@ -48949,49 +48949,49 @@ { "name": "time", "unit": "", - "value": 3386.976 + "value": 8090.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.384454159830327 + "value": 8.135632238833727 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1124.0 + "value": 12864.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839276.0 + "value": 2002940.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.854213525729012 + "value": 51.74972333189757 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 58944.0 + "value": 8323941.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103061.0 + "value": 138424331.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.226824935224893 + "value": 2.5947053079077986 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49003,7 +49003,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07295057159456247 + "value": 0.029970745596199177 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49021,25 +49021,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.67379208432195 + "value": 85.36379920850604 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97185691856738 + "value": 98.56486595079154 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49063,25 +49063,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2563768320.0 + "value": 3769630720.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2453667840.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1362100224.0 + "value": 144703488.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -49093,13 +49093,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484179968.0 + "value": 379617280.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.76728674590101 + "value": 10.191693973159959 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49111,13 +49111,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.81494300186261 + "value": 20.757933160368353 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.344509112673881 + "value": 11.87398373895094 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49129,7 +49129,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.8525432684732 + "value": 29.355912389466752 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49148,19 +49148,19 @@ "time" ], "times": { - "compilation": 16873.007, - "data": 62739.409, - "framework": 1721058.886, - "kernel_overhead": 788336.592, - "profiling_overhead": 52925.188, - "profiling_runs": 817057.697, + "compilation_time": 24215.945, + "data": 78247.447, + "framework": 922000.013, + "kernel_overhead": 361865.57, + "profiling_overhead": 64844.229, + "profiling_runs": 417042.767, "runtimes": [ - 3386.976 + 8090.496 ], - "search_algorithm": 25.535, - "validation": 19.704 + "search_algorithm": 42.498, + "validation": 26.252 }, - "timestamp": "2026-03-02 14:29:32 UTC" + "timestamp": "2026-03-05 09:00:36 UTC" }, { "compilation_data": { @@ -49168,7 +49168,7 @@ "global_size": { "x": 8, "y": 32, - "z": 16 + "z": 8 }, "local_memory_size": 0, "local_size": { @@ -49177,18 +49177,18 @@ "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 38 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "1", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "Z_ITERATIONS": "32" }, "correctness": 1, "invalidity": "correct", @@ -49196,61 +49196,61 @@ { "name": "time", "unit": "", - "value": 3360.8 + "value": 8625.12 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.338697775312506 + "value": 8.063457950698561 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11736.0 + "value": 6728.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841200.0 + "value": 2200652.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8576682001786744 + "value": 61.356680346013235 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68297.0 + "value": 46739764.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105750.0 + "value": 138415274.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.252194500587304 + "value": 1.2064805030356447 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 8388608.0 + "value": 4194304.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07322342906605447 + "value": 0.014021967429461848 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49268,25 +49268,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.19986492559298 + "value": 76.05859515468921 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92388034089177 + "value": 99.25660372418342 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49298,7 +49298,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4630511616.0 + "value": 4462739456.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -49310,43 +49310,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 2563768320.0 + "value": 3528982528.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 1073741824.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 285212672.0 + "value": 2319450112.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1362100224.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 3145728.0 + "value": 1572864.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 484179968.0 + "value": 359219200.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.94796885258666 + "value": 8.4053607003467 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49358,13 +49358,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.025273309938 + "value": 19.28804614709385 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3586304102131224 + "value": 10.423361852195372 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49376,7 +49376,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.23195558042815 + "value": 25.811520214807686 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49395,47 +49395,47 @@ "time" ], "times": { - "compilation": 16737.263, - "data": 65351.0, - "framework": 1720440.997, - "kernel_overhead": 785432.471, - "profiling_overhead": 55603.067, - "profiling_runs": 814054.459, + "compilation_time": 24041.102, + "data": 78706.309, + "framework": 908163.1880000001, + "kernel_overhead": 353274.014, + "profiling_overhead": 65015.233, + "profiling_runs": 411167.632, "runtimes": [ - 3360.8 + 8625.12 ], - "search_algorithm": 25.11, - "validation": 17.083 + "search_algorithm": 37.178, + "validation": 38.88 }, - "timestamp": "2026-03-02 14:29:33 UTC" + "timestamp": "2026-03-05 09:00:37 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 64 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "4" }, "correctness": 1, "invalidity": "correct", @@ -49443,61 +49443,61 @@ { "name": "time", "unit": "", - "value": 6287.136 + "value": 3950.784 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.928222619343274 + "value": 15.90295084136471 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10304.0 + "value": 3592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869352.0 + "value": 1842392.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.018673155172287 + "value": 1.5775104373684279 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111886.0 + "value": 71749.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102951.0 + "value": 2104966.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6606701247938778 + "value": 20.926013506303686 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 33554432.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019456948461380998 + "value": 0.24516418972791373 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49527,13 +49527,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.02892972522397 + "value": 98.1708214591034 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9762894171048 + "value": 99.96507347393995 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49545,7 +49545,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 5637144576.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -49557,43 +49557,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7219970048.0 + "value": 5221908480.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1090519040.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 551026688.0 + "value": 2193620992.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 12582912.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 952385536.0 + "value": 535298048.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24349202071664 + "value": 39.66936716748703 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49605,13 +49605,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.57152056168148 + "value": 41.85597388452374 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9438858012023086 + "value": 10.750118292607173 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49623,7 +49623,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.27376038338296 + "value": 83.46743014893643 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49642,47 +49642,47 @@ "time" ], "times": { - "compilation": 84302.687, - "data": 62413.083, - "framework": 3663554.362, - "kernel_overhead": 1752994.527, - "profiling_overhead": 52490.516, - "profiling_runs": 1795656.236, + "compilation_time": 24116.146, + "data": 79245.053, + "framework": 1415123.358, + "kernel_overhead": 618644.604, + "profiling_overhead": 66439.329, + "profiling_runs": 650794.372, "runtimes": [ - 6287.136 + 3950.784 ], - "search_algorithm": 25.718, - "validation": 15.775 + "search_algorithm": 41.877, + "validation": 28.555 }, - "timestamp": "2026-03-02 14:29:35 UTC" + "timestamp": "2026-03-05 09:00:37 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -49690,61 +49690,61 @@ { "name": "time", "unit": "", - "value": 6329.44 + "value": 4236.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.835863956069759 + "value": 14.908583555638275 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9752.0 + "value": 5364.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869732.0 + "value": 1840228.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0129348129641307 + "value": 1.4869796502321442 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110865.0 + "value": 78356.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102825.0 + "value": 2108049.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6606315360307207 + "value": 9.821006571665619 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01945654488053938 + "value": 0.11505348118941593 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49774,13 +49774,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.99355804313643 + "value": 98.7831965909778 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97774028286662 + "value": 99.96000825308357 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49792,7 +49792,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -49804,43 +49804,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7219970048.0 + "value": 5863636992.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 551026688.0 + "value": 1373634560.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 952385536.0 + "value": 621477888.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24199642488347 + "value": 43.277425103423695 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49852,13 +49852,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.57058381369405 + "value": 39.287299923543024 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9438525256085166 + "value": 5.1219282615165955 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49870,7 +49870,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.27043687183976 + "value": 90.95793194170592 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49889,47 +49889,47 @@ "time" ], "times": { - "compilation": 15576.604, - "data": 60773.065, - "framework": 3668831.347, - "kernel_overhead": 1757044.977, - "profiling_overhead": 51062.306, - "profiling_runs": 1799950.999, + "compilation_time": 24685.986, + "data": 78041.327, + "framework": 1918435.955, + "kernel_overhead": 871168.803, + "profiling_overhead": 64847.741, + "profiling_runs": 904378.084, "runtimes": [ - 6329.44 + 4236.48 ], - "search_algorithm": 26.668, - "validation": 16.22 + "search_algorithm": 44.42, + "validation": 28.369 }, - "timestamp": "2026-03-02 14:29:36 UTC" + "timestamp": "2026-03-05 09:00:38 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -49937,61 +49937,61 @@ { "name": "time", "unit": "", - "value": 6530.24 + "value": 6838.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.707821922194354 + "value": 9.285256901595575 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9844.0 + "value": 19008.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838948.0 + "value": 1841704.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0130702548518211 + "value": 0.9416504668621262 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111867.0 + "value": 130057.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102817.0 + "value": 2106727.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6606978132871584 + "value": 3.039212143170935 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019456446298450676 + "value": 0.035603269706009945 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50021,13 +50021,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.03837862664565 + "value": 98.56857093128562 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97036882031011 + "value": 99.95587674007793 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50039,7 +50039,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -50051,43 +50051,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7219970048.0 + "value": 8738832384.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 551026688.0 + "value": 1157627904.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 952385536.0 + "value": 970653696.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24505204352288 + "value": 36.45602488721565 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50099,13 +50099,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.572408397564867 + "value": 24.31589441793252 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9439173393177949 + "value": 1.6325368566727154 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50117,7 +50117,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.27691035845254 + "value": 87.92580889586094 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50136,45 +50136,45 @@ "time" ], "times": { - "compilation": 15645.104, - "data": 61147.31, - "framework": 3665365.4560000002, - "kernel_overhead": 1754918.443, - "profiling_overhead": 51425.715, - "profiling_runs": 1797873.988, + "compilation_time": 24861.159, + "data": 79806.908, + "framework": 3152635.467, + "kernel_overhead": 1480914.161, + "profiling_overhead": 64942.192, + "profiling_runs": 1526972.206, "runtimes": [ - 6530.24 + 6838.592 ], - "search_algorithm": 24.347, - "validation": 18.061 + "search_algorithm": 45.49, + "validation": 29.497 }, - "timestamp": "2026-03-02 14:29:38 UTC" + "timestamp": "2026-03-05 09:00:40 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, - "private_memory_size": 0, - "registers": 48 + "private_memory_size": 128, + "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "2", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" }, @@ -50184,49 +50184,49 @@ { "name": "time", "unit": "", - "value": 6327.2 + "value": 9165.92 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.700568860997267 + "value": 8.145448480634432 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9084.0 + "value": 22636.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837988.0 + "value": 2196596.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0123538243271357 + "value": 79.04288350906616 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110173.0 + "value": 118168995.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102954.0 + "value": 138425334.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6606006742009345 + "value": 1.118574600383318 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50238,7 +50238,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019456268853208437 + "value": 0.013013605868966799 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50256,25 +50256,25 @@ "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum", "type": "Double", "unit": "", - "value": 0.0 + "value": 136314880.0 }, { "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.99679827504048 + "value": 94.04285060999976 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97544770239483 + "value": 97.27988108658779 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50298,25 +50298,25 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7219970048.0 + "value": 1918369792.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 536870912.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 1245708288.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 551026688.0 + "value": 72351744.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -50328,13 +50328,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 952385536.0 + "value": 258555904.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.242519753109185 + "value": 7.959519426128018 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50346,13 +50346,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.5708161534387 + "value": 18.264732318029502 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9438607788880202 + "value": 5.3041745830801 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50364,7 +50364,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.2712611958134 + "value": 17.592829727686745 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50383,19 +50383,19 @@ "time" ], "times": { - "compilation": 15147.041, - "data": 67363.488, - "framework": 3676673.6229999997, - "kernel_overhead": 1754545.057, - "profiling_overhead": 57488.886, - "profiling_runs": 1797276.192, + "compilation_time": 24135.997, + "data": 77924.161, + "framework": 586277.313, + "kernel_overhead": 191176.399, + "profiling_overhead": 64912.126, + "profiling_runs": 252264.627, "runtimes": [ - 6327.2 + 9165.92 ], - "search_algorithm": 31.288, - "validation": 17.75 + "search_algorithm": 48.631, + "validation": 27.454 }, - "timestamp": "2026-03-02 14:29:40 UTC" + "timestamp": "2026-03-05 09:00:40 UTC" }, { "compilation_data": { @@ -50403,7 +50403,7 @@ "global_size": { "x": 8, "y": 32, - "z": 8 + "z": 32 }, "local_memory_size": 0, "local_size": { @@ -50413,17 +50413,17 @@ }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "8" }, "correctness": 1, "invalidity": "correct", @@ -50431,61 +50431,61 @@ { "name": "time", "unit": "", - "value": 6288.416 + "value": 2791.648 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.76157417314915 + "value": 22.490874843847593 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17936.0 + "value": 6028.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839020.0 + "value": 1837576.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0213104399416653 + "value": 2.217034693898057 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 120962.0 + "value": 50824.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106542.0 + "value": 2102377.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6630453379794148 + "value": 14.86756301873132 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 16777216.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01948480266261858 + "value": 0.17416611135391405 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50515,13 +50515,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.88903025974514 + "value": 98.55989876475215 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9626868328243 + "value": 99.95610568054649 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50533,7 +50533,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4966055936.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -50545,43 +50545,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 7219970048.0 + "value": 3042967552.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 134217728.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 553648128.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 551026688.0 + "value": 1644167168.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 6291456.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 952385536.0 + "value": 399179776.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.31132524956411 + "value": 45.97136285728396 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50593,13 +50593,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.613180789333935 + "value": 59.47480540321403 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.94536567501174 + "value": 7.753795430985422 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50611,7 +50611,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.42157964092183 + "value": 88.44344800426047 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50630,47 +50630,47 @@ "time" ], "times": { - "compilation": 18601.258, - "data": 61619.465, - "framework": 3665823.42, - "kernel_overhead": 1754853.565, - "profiling_overhead": 51706.383, - "profiling_runs": 1797644.007, + "compilation_time": 24198.094, + "data": 78770.391, + "framework": 1196677.217, + "kernel_overhead": 512842.886, + "profiling_overhead": 65554.928, + "profiling_runs": 539509.012, "runtimes": [ - 6288.416 + 2791.648 ], - "search_algorithm": 39.993, - "validation": 19.886 + "search_algorithm": 45.382, + "validation": 30.198 }, - "timestamp": "2026-03-02 14:29:42 UTC" + "timestamp": "2026-03-05 09:00:41 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, - "y": 64, - "z": 8 + "x": 8, + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { - "x": 16, - "y": 4, + "x": 32, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_X": "32", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -50678,61 +50678,61 @@ { "name": "time", "unit": "", - "value": 5861.824 + "value": 3516.288 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.3040225197671 + "value": 18.178393894070034 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8888.0 + "value": 10672.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1867752.0 + "value": 1840604.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0644309409637727 + "value": 1.8007282731522503 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 105127.0 + "value": 67680.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101158.0 + "value": 2104923.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7422209760148286 + "value": 6.018119050830337 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020411815690473685 + "value": 0.07050194654910528 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50762,13 +50762,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.02231449338082 + "value": 97.87885903881602 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97475547349117 + "value": 99.94660232742486 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50780,7 +50780,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -50792,43 +50792,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5944901632.0 + "value": 3301965824.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1054343168.0 + "value": 1429209088.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 913571840.0 + "value": 513540096.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.05970765715054 + "value": 42.09399134711855 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50840,13 +50840,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.875969512573267 + "value": 48.15504251612038 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902230381053247 + "value": 3.233065598616481 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50858,7 +50858,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98178497720443 + "value": 92.12552685022465 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50877,44 +50877,44 @@ "time" ], "times": { - "compilation": 82562.464, - "data": 62606.884, - "framework": 3577884.034, - "kernel_overhead": 1710718.066, - "profiling_overhead": 52459.798, - "profiling_runs": 1752099.286, + "compilation_time": 24698.605, + "data": 80136.006, + "framework": 1879396.657, + "kernel_overhead": 851819.686, + "profiling_overhead": 65983.091, + "profiling_runs": 881457.874, "runtimes": [ - 5861.824 + 3516.288 ], - "search_algorithm": 31.997, - "validation": 21.409 + "search_algorithm": 35.249, + "validation": 28.315 }, - "timestamp": "2026-03-02 14:29:44 UTC" + "timestamp": "2026-03-05 09:00:42 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { - "x": 16, + "x": 8, "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { - "x": 16, + "x": 32, "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "4", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", + "WORK_GROUP_SIZE_X": "32", "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" @@ -50925,49 +50925,49 @@ { "name": "time", "unit": "", - "value": 5997.312 + "value": 5563.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.21437019225386 + "value": 11.221908793564845 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9120.0 + "value": 460.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868940.0 + "value": 1835236.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0598538800501756 + "value": 1.1300387376468086 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 105534.0 + "value": 90916.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103000.0 + "value": 2099287.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7421734460089777 + "value": 1.8625685915885937 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50979,7 +50979,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020411718040420582 + "value": 0.02181860098945793 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51009,13 +51009,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98496725575745 + "value": 65.51030609794789 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97184252665176 + "value": 99.93690497125672 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51039,13 +51039,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5944901632.0 + "value": 4806148096.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 268435456.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -51057,7 +51057,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1054343168.0 + "value": 1222115328.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -51069,13 +51069,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 913571840.0 + "value": 868564992.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.06070527278263 + "value": 42.34571709523101 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51087,13 +51087,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.87664839102866 + "value": 29.80847088087678 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902471535387376 + "value": 1.0588702424725516 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51105,7 +51105,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98409843116656 + "value": 96.4505062399428 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51124,47 +51124,47 @@ "time" ], "times": { - "compilation": 18420.375, - "data": 66845.677, - "framework": 3585667.8719999995, - "kernel_overhead": 1710441.407, - "profiling_overhead": 56500.854, - "profiling_runs": 1751879.934, + "compilation_time": 23581.835, + "data": 75326.499, + "framework": 3354481.438, + "kernel_overhead": 1588474.409, + "profiling_overhead": 62603.051, + "profiling_runs": 1628077.479, "runtimes": [ - 5997.312 + 5563.296 ], - "search_algorithm": 36.69, - "validation": 20.282 + "search_algorithm": 45.147, + "validation": 32.326 }, - "timestamp": "2026-03-02 14:29:46 UTC" + "timestamp": "2026-03-05 09:00:44 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 128, - "z": 8 + "y": 32, + "z": 16 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 2, + "y": 8, "z": 1 }, "max_work_group_size": 1024, "private_memory_size": 0, - "registers": 48 + "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "Z_ITERATIONS": "16" }, "correctness": 1, "invalidity": "correct", @@ -51172,61 +51172,61 @@ { "name": "time", "unit": "", - "value": 5997.44 + "value": 3350.72 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.045999333758832 + "value": 18.988455957664733 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2104.0 + "value": 9892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837420.0 + "value": 1841048.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0554914148191614 + "value": 1.8782185453861036 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 99579.0 + "value": 64533.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099815.0 + "value": 2104827.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7422248260009499 + "value": 6.251894138926755 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", "type": "Double", "unit": "", - "value": 4194304.0 + "value": 8388608.0 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020412306667398905 + "value": 0.07321920996067269 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51256,13 +51256,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.03084386557352 + "value": 98.28363392705725 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97377177233805 + "value": 99.90941576963502 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51274,7 +51274,7 @@ "name": "smsp__sass_thread_inst_executed_op_fp32_pred_on.sum", "type": "Double", "unit": "", - "value": 4462739456.0 + "value": 4630511616.0 }, { "name": "smsp__sass_thread_inst_executed_op_fp64_pred_on.sum", @@ -51286,43 +51286,43 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5944901632.0 + "value": 2563768320.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", "type": "Double", "unit": "", - "value": 150994944.0 + "value": 285212672.0 }, { "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1054343168.0 + "value": 1362100224.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", "type": "Double", "unit": "", - "value": 1572864.0 + "value": 3145728.0 }, { "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 913571840.0 + "value": 484179968.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.06146674128025 + "value": 42.951574176414084 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51334,13 +51334,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.876914323775935 + "value": 50.029632957788465 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902566001243649 + "value": 3.358923111179645 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51352,7 +51352,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98500392066505 + "value": 90.23981919665971 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51371,32 +51371,32 @@ "time" ], "times": { - "compilation": 16285.268, - "data": 61057.751, - "framework": 3575419.1559999995, - "kernel_overhead": 1711148.248, - "profiling_overhead": 51100.351, - "profiling_runs": 1752112.806, + "compilation_time": 24972.932, + "data": 77391.656, + "framework": 1747351.6519999998, + "kernel_overhead": 788741.945, + "profiling_overhead": 63335.835, + "profiling_runs": 817882.216, "runtimes": [ - 5997.44 + 3350.72 ], - "search_algorithm": 28.463, - "validation": 16.524 + "search_algorithm": 54.468, + "validation": 26.877 }, - "timestamp": "2026-03-02 14:29:48 UTC" + "timestamp": "2026-03-05 09:00:45 UTC" }, { "compilation_data": { "constant_memory_size": 0, "global_size": { "x": 8, - "y": 64, + "y": 32, "z": 8 }, "local_memory_size": 0, "local_size": { "x": 32, - "y": 4, + "y": 8, "z": 1 }, "max_work_group_size": 1024, @@ -51404,12 +51404,12 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", + "INNER_UNROLL_FACTOR": "8", "USE_CONSTANT_MEMORY": "0", "USE_SOA": "1", "VECTOR_SIZE": "1", "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", + "WORK_GROUP_SIZE_Y": "8", "WORK_GROUP_SIZE_Z": "1", "Z_ITERATIONS": "32" }, @@ -51419,49 +51419,49 @@ { "name": "time", "unit": "", - "value": 6017.824 + "value": 6236.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.079542089855753 + "value": 10.005895814408497 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10800.0 + "value": 644.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840364.0 + "value": 1832244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.066640577830882 + "value": 1.0167806807657565 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110568.0 + "value": 103356.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109045.0 + "value": 2102967.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7421664050152015 + "value": 1.6630653180086694 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -51473,7 +51473,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020411509180943345 + "value": 0.019484653742100056 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51503,13 +51503,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98873966620913 + "value": 81.88728529120492 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97384059039892 + "value": 99.95878281391381 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51533,13 +51533,13 @@ "name": "smsp__sass_thread_inst_executed_op_integer_pred_on.sum", "type": "Double", "unit": "", - "value": 5944901632.0 + "value": 7219970048.0 }, { "name": "smsp__sass_thread_inst_executed_op_control_pred_on.sum", "type": "Double", "unit": "", - "value": 101187584.0 + "value": 134217728.0 }, { "name": "smsp__sass_thread_inst_executed_op_memory_pred_on.sum", @@ -51551,7 +51551,7 @@ "name": "smsp__sass_thread_inst_executed_op_misc_pred_on.sum", "type": "Double", "unit": "", - "value": 1054343168.0 + "value": 551026688.0 }, { "name": "smsp__sass_thread_inst_executed_op_conversion_pred_on.sum", @@ -51563,13 +51563,13 @@ "name": "smsp__inst_executed.sum", "type": "Double", "unit": "", - "value": 913571840.0 + "value": 952385536.0 }, { "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.05954477633211 + "value": 43.31271424638746 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51581,13 +51581,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.87580601465629 + "value": 26.614016791373185 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902172302569557 + "value": 0.9453953718615231 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51599,7 +51599,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98122872155051 + "value": 94.42454571377702 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51618,19 +51618,19 @@ "time" ], "times": { - "compilation": 16074.796, - "data": 66391.894, - "framework": 3583095.781, - "kernel_overhead": 1709411.805, - "profiling_overhead": 56473.797, - "profiling_runs": 1750818.285, + "compilation_time": 24979.427, + "data": 78284.69, + "framework": 3708605.554, + "kernel_overhead": 1761394.898, + "profiling_overhead": 64543.004, + "profiling_runs": 1804382.962, "runtimes": [ - 6017.824 + 6236.736 ], - "search_algorithm": 24.867, - "validation": 19.972 + "search_algorithm": 50.199, + "validation": 29.194 }, - "timestamp": "2026-03-02 14:29:50 UTC" + "timestamp": "2026-03-05 09:00:47 UTC" }, { "compilation_data": { @@ -51666,49 +51666,49 @@ { "name": "time", "unit": "", - "value": 6069.92 + "value": 5932.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.074889278104397 + "value": 10.62023681160696 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17748.0 + "value": 16728.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835244.0 + "value": 1836556.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0699909902100238 + "value": 1.0755796159854507 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 116713.0 + "value": 116918.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106524.0 + "value": 2109085.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7438483358716461 + "value": 1.7439376209537754 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -51720,7 +51720,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020430260487816362 + "value": 0.02043118649027698 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51750,13 +51750,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.85695580834701 + "value": 81.85709614989365 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96423935453412 + "value": 99.96637055468311 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51816,7 +51816,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.10406079865785 + "value": 44.10535746174518 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51828,13 +51828,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.904094336944908 + "value": 27.90476417213291 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9912221010804405 + "value": 0.9912458952747409 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51846,7 +51846,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.07762955026169 + "value": 95.07991291944529 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51865,19 +51865,19 @@ "time" ], "times": { - "compilation": 15980.912, - "data": 60555.548, - "framework": 3575935.317, - "kernel_overhead": 1711449.768, - "profiling_overhead": 50852.782, - "profiling_runs": 1753077.219, + "compilation_time": 24780.61, + "data": 77758.398, + "framework": 3620788.844, + "kernel_overhead": 1718387.76, + "profiling_overhead": 64627.364, + "profiling_runs": 1760015.322, "runtimes": [ - 6069.92 + 5932.8 ], - "search_algorithm": 13.173, - "validation": 21.007 + "search_algorithm": 25.304, + "validation": 27.046 }, - "timestamp": "2026-03-02 14:29:51 UTC" + "timestamp": "2026-03-05 09:00:49 UTC" } ], "schema_version": "1.0.0" diff --git a/Source/Output/JsonT4Converters.cpp b/Source/Output/JsonT4Converters.cpp index 253afaf3..d98f5a5b 100644 --- a/Source/Output/JsonT4Converters.cpp +++ b/Source/Output/JsonT4Converters.cpp @@ -97,7 +97,7 @@ void to_json(json& j, const as_T4& result) to_json(j_configuration,as_T4(configuration)); j["configuration"] = j_configuration; j["times"] = json::object(); - j["times"]["compilation"] = time.ConvertFromNanosecondsDouble(result.v.GetCompilationOverhead()); + j["times"]["compilation_time"] = time.ConvertFromNanosecondsDouble(result.v.GetCompilationOverhead()); j["times"]["data"] = time.ConvertFromNanosecondsDouble(result.v.GetDataMovementOverhead()); j["times"]["profiling_runs"] = time.ConvertFromNanosecondsDouble(result.v.GetProfilingRunsOverhead()); j["times"]["profiling_overhead"] = time.ConvertFromNanosecondsDouble(result.v.GetProfilingOverhead()); @@ -169,7 +169,7 @@ void from_json(const json& j, as_T4& result) const Nanoseconds durationNs = time.ConvertToNanosecondsDouble(duration); double compilationOverhead; - j.at("times").at("compilation").get_to(compilationOverhead); + j.at("times").at("compilation_time").get_to(compilationOverhead); const Nanoseconds compilationOverheadNs = time.ConvertToNanosecondsDouble(compilationOverhead); double dataMovementOverhead; diff --git a/Tutorials/03KernelTuning/FullSearchSpace.t4.json b/Tutorials/03KernelTuning/FullSearchSpace.t4.json index 14f100a6..32d2c2b9 100644 --- a/Tutorials/03KernelTuning/FullSearchSpace.t4.json +++ b/Tutorials/03KernelTuning/FullSearchSpace.t4.json @@ -5,7 +5,7 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-03-04 11:13:39 UTC", + "timestamp": "2026-03-05 09:04:43 UTC", "timeunit": "microseconds" }, "results": [ @@ -36,49 +36,49 @@ { "name": "time", "unit": "", - "value": 94.048 + "value": 90.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.827121284755513 + "value": 26.710382938978828 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2020.0 + "value": 4828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 50920.0 + "value": 50080.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.872445901375592 + "value": 19.857135397000107 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 263537.0 + "value": 263456.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131582.0 + "value": 131870.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.441561913636126 + "value": 6.277755426557081 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -90,7 +90,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4759668916411122 + "value": 1.4745269705958586 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -120,13 +120,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.283510567368428 + "value": 24.890646652745048 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 41.14330905218855 + "value": 40.744663569536044 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -186,7 +186,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.070893672901722 + "value": 8.142985215480907 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -204,7 +204,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.174760249686823 + "value": 7.237890027386714 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -216,7 +216,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.513517951817022 + "value": 13.632338798988025 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -235,19 +235,19 @@ "time" ], "times": { - "compilation": 42500.777, - "data": 7388.856, - "framework": 44869.263, - "kernel_overhead": 9641.577, - "profiling_overhead": 4680.642, - "profiling_runs": 23158.188, + "compilation_time": 16215.021, + "data": 10669.496, + "framework": 48014.751000000004, + "kernel_overhead": 9367.319, + "profiling_overhead": 4977.19, + "profiling_runs": 23000.746, "runtimes": [ - 94.048 + 90.656 ], - "search_algorithm": 23.224, - "validation": 11778.9 + "search_algorithm": 17.65, + "validation": 10899.337 }, - "timestamp": "2026-03-04 11:13:39 UTC" + "timestamp": "2026-03-05 09:04:43 UTC" }, { "compilation_data": { @@ -276,49 +276,49 @@ { "name": "time", "unit": "", - "value": 55.264 + "value": 62.816 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.87621124031008 + "value": 49.741541353383454 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3628.0 + "value": 7328.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 45084.0 + "value": 43480.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 40.13202292869339 + "value": 39.85658224775467 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262755.0 + "value": 262729.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131976.0 + "value": 131205.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.886138515029279 + "value": 6.859707591566043 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -330,7 +330,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9827921439546756 + "value": 2.969729925684249 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -360,13 +360,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.674577523171475 + "value": 29.580783726337412 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 82.36401843126689 + "value": 81.44435381547943 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -426,7 +426,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.14842874269299 + "value": 8.204882396732012 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -444,7 +444,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.242949532492342 + "value": 7.292660047159261 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -456,7 +456,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.684307375544268 + "value": 13.778449397267256 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -475,19 +475,19 @@ "time" ], "times": { - "compilation": 13826.961, - "data": 5674.116, - "framework": 41277.788, - "kernel_overhead": 8908.093, - "profiling_overhead": 4539.167, - "profiling_runs": 22156.412, + "compilation_time": 14667.761, + "data": 6443.632, + "framework": 43979.096999999994, + "kernel_overhead": 9226.711, + "profiling_overhead": 5352.126, + "profiling_runs": 22956.628, "runtimes": [ - 55.264 + 62.816 ], - "search_algorithm": 12.698, - "validation": 11471.168 + "search_algorithm": 22.325, + "validation": 12768.84 }, - "timestamp": "2026-03-04 11:13:39 UTC" + "timestamp": "2026-03-05 09:04:43 UTC" }, { "compilation_data": { @@ -516,49 +516,49 @@ { "name": "time", "unit": "", - "value": 51.776 + "value": 64.127 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 72.11106115107914 + "value": 83.6156952247191 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9984.0 + "value": 25264.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 41336.0 + "value": 50940.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 74.64229988486942 + "value": 44.82256259553097 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262484.0 + "value": 262635.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131563.0 + "value": 131352.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.333441841160818 + "value": 12.684698076468973 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -570,7 +570,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.544200652418143 + "value": 3.3374888981234774 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -600,13 +600,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.173972125695464 + "value": 62.81015276503553 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 82.47019619919057 + "value": 49.804775029129694 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -666,7 +666,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 15.123652293351647 + "value": 15.080535694657113 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -684,7 +684,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.445343670644883 + "value": 13.402284805709714 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -696,7 +696,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.455595499002154 + "value": 25.374329295258352 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -715,19 +715,19 @@ "time" ], "times": { - "compilation": 14160.107, - "data": 5192.749, - "framework": 40546.879, - "kernel_overhead": 9063.23, - "profiling_overhead": 4221.417, - "profiling_runs": 22069.483, + "compilation_time": 13879.778, + "data": 6938.962, + "framework": 44236.454, + "kernel_overhead": 8881.219, + "profiling_overhead": 5568.067, + "profiling_runs": 22848.206, "runtimes": [ - 51.776 + 64.127 ], - "search_algorithm": 15.502, - "validation": 10718.272 + "search_algorithm": 21.857, + "validation": 12616.121 }, - "timestamp": "2026-03-04 11:13:39 UTC" + "timestamp": "2026-03-05 09:04:43 UTC" }, { "compilation_data": { @@ -756,49 +756,49 @@ { "name": "time", "unit": "", - "value": 53.632 + "value": 69.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.65874094202898 + "value": 80.48551502145924 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10196.0 + "value": 10328.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 43968.0 + "value": 37680.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 74.86345335764206 + "value": 68.16769541629762 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262432.0 + "value": 262508.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131633.0 + "value": 131398.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.187193515867252 + "value": 13.075452800600143 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -810,7 +810,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.561288220279151 + "value": 5.073953868642073 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -840,13 +840,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 63.05144087367304 + "value": 63.04814826268487 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 84.22369725194156 + "value": 76.6576908307113 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -906,7 +906,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.854526714350053 + "value": 14.897442034979491 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -924,7 +924,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.20599404142389 + "value": 13.237951244441348 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -936,7 +936,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.01725407775569 + "value": 25.077793365066764 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -955,19 +955,19 @@ "time" ], "times": { - "compilation": 13681.31, - "data": 5773.082, - "framework": 41804.396, - "kernel_overhead": 9070.811, - "profiling_overhead": 4826.182, - "profiling_runs": 22134.321, + "compilation_time": 14439.507, + "data": 6548.418, + "framework": 43838.988, + "kernel_overhead": 9133.236, + "profiling_overhead": 5376.813, + "profiling_runs": 22780.521, "runtimes": [ - 53.632 + 69.728 ], - "search_algorithm": 8.022, - "validation": 12624.658 + "search_algorithm": 6.933, + "validation": 14969.414 }, - "timestamp": "2026-03-04 11:13:39 UTC" + "timestamp": "2026-03-05 09:04:43 UTC" } ], "schema_version": "1.0.0" From f88e1d797c6a1dbcd7c2e259971a5179faa7e381 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Fri, 13 Mar 2026 10:52:48 +0100 Subject: [PATCH 3/3] Change parameters' values type in configuration in T4 results schema --- ...oulomb_rtx500ada_full_search_space.t4.json | 13862 ++++++++-------- Source/Output/JsonT4Converters.cpp | 67 +- .../03KernelTuning/FullSearchSpace.t4.json | 202 +- 3 files changed, 7063 insertions(+), 7068 deletions(-) diff --git a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json index 5e6bdf2b..cb0d4727 100644 --- a/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json +++ b/Examples/CoulombSum3d/coulomb_rtx500ada_full_search_space.t4.json @@ -5,7 +5,7 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-03-05 09:00:49 UTC", + "timestamp": "2026-03-13 09:42:10 UTC", "timeunit": "microseconds" }, "results": [ @@ -28,14 +28,14 @@ "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -43,49 +43,49 @@ { "name": "time", "unit": "", - "value": 4164.159 + "value": 4625.056 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.08856462977461 + "value": 10.413613167579596 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4716.0 + "value": 684.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870148.0 + "value": 1868208.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6511836629193575 + "value": 1.61945582588573 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 67640.0 + "value": 65116.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099965.0 + "value": 2099676.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.496709444370346 + "value": 22.494046099611182 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -97,7 +97,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0542384694410099 + "value": 1.0541427733786057 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -127,13 +127,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.34603258600442 + "value": 60.270256988922235 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94811431437259 + "value": 99.94543870312687 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -193,7 +193,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.99411082917537 + "value": 39.991802159177155 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -205,13 +205,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.00419209644674 + "value": 45.00131163005578 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.954238525618663 + "value": 11.953473401733568 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -223,7 +223,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.22274318740528 + "value": 57.2191579633614 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -242,19 +242,19 @@ "time" ], "times": { - "compilation_time": 14958.313, - "data": 79109.886, - "framework": 298309.184, - "kernel_overhead": 59740.158, - "profiling_overhead": 65245.883, - "profiling_runs": 94213.257, + "compilation_time": 16550.897, + "data": 61128.287, + "framework": 269589.194, + "kernel_overhead": 60468.296, + "profiling_overhead": 51204.158, + "profiling_runs": 96788.453, "runtimes": [ - 4164.159 + 4625.056 ], - "search_algorithm": 44.101, - "validation": 21.809 + "search_algorithm": 25.784, + "validation": 13.724 }, - "timestamp": "2026-03-05 08:58:2 UTC" + "timestamp": "2026-03-13 09:39:30 UTC" }, { "compilation_data": { @@ -275,14 +275,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -290,49 +290,49 @@ { "name": "time", "unit": "", - "value": 2065.599 + "value": 2122.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 31.19627659574468 + "value": 23.048801316078777 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5716.0 + "value": 6604.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871052.0 + "value": 1871936.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9417972854982906 + "value": 2.9495507070514804 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 40336.0 + "value": 41726.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099148.0 + "value": 2100138.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.914180329323386 + "value": 19.910985735743168 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -344,7 +344,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9329897641046083 + "value": 0.9329052962190687 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -374,13 +374,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.77744355125036 + "value": 95.3901192598929 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9367232322082 + "value": 99.94629043595558 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -440,7 +440,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.492465935580846 + "value": 50.483527003321306 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -452,13 +452,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.66553631334968 + "value": 79.65069865369911 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.736175792228764 + "value": 10.734176185752418 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -470,7 +470,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.2432473827244 + "value": 76.22919815387539 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -489,19 +489,19 @@ "time" ], "times": { - "compilation_time": 28842.744, - "data": 79997.846, - "framework": 294033.778, - "kernel_overhead": 62610.538, - "profiling_overhead": 65758.4, - "profiling_runs": 85666.994, + "compilation_time": 14436.949, + "data": 63774.84, + "framework": 273382.684, + "kernel_overhead": 66712.869, + "profiling_overhead": 53480.402, + "profiling_runs": 89414.573, "runtimes": [ - 2065.599 + 2122.56 ], - "search_algorithm": 43.138, - "validation": 27.005 + "search_algorithm": 30.007, + "validation": 16.354 }, - "timestamp": "2026-03-05 08:58:3 UTC" + "timestamp": "2026-03-13 09:39:30 UTC" }, { "compilation_data": { @@ -522,14 +522,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -537,49 +537,49 @@ { "name": "time", "unit": "", - "value": 2062.048 + "value": 1828.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.85724296278626 + "value": 27.00451696165192 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 24.0 + "value": 2480.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870332.0 + "value": 1872368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3292094087218165 + "value": 3.346337561497289 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31981.0 + "value": 36972.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098881.0 + "value": 2103303.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.292560514913129 + "value": 11.290646780919694 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -591,7 +591,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5290514380679806 + "value": 0.5289034228289748 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -621,13 +621,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.98345631231332 + "value": 97.11650820752672 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91489036218367 + "value": 99.90403221337345 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -687,7 +687,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.84691858176103 + "value": 45.83808438201746 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -699,13 +699,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.36835761213294 + "value": 90.35289383517835 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.265774795372499 + "value": 6.264702599900061 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -717,7 +717,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.45370302261419 + "value": 66.44250720680364 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -736,19 +736,19 @@ "time" ], "times": { - "compilation_time": 24253.315, - "data": 78566.579, - "framework": 240769.07400000002, - "kernel_overhead": 37803.868, - "profiling_overhead": 64569.858, - "profiling_runs": 59828.769, + "compilation_time": 14301.771, + "data": 64456.265, + "framework": 207942.981, + "kernel_overhead": 34402.855, + "profiling_overhead": 53730.376, + "profiling_runs": 55353.485, "runtimes": [ - 2062.048 + 1828.192 ], - "search_algorithm": 34.974, - "validation": 25.508 + "search_algorithm": 22.356, + "validation": 14.318 }, - "timestamp": "2026-03-05 08:58:3 UTC" + "timestamp": "2026-03-13 09:39:30 UTC" }, { "compilation_data": { @@ -769,14 +769,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -784,49 +784,49 @@ { "name": "time", "unit": "", - "value": 1785.216 + "value": 1786.944 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.2106897790329 + "value": 27.459425162486657 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 424.0 + "value": 4780.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870620.0 + "value": 1871740.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4349717901443535 + "value": 3.43097102667654 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32211.0 + "value": 36703.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099175.0 + "value": 2100249.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.798133949028227 + "value": 5.79713181637969 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -838,7 +838,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.27158949142281796 + "value": 0.2715836989664437 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -868,13 +868,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.54738722612807 + "value": 98.4168065338735 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91037980534982 + "value": 99.92458230027061 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -934,7 +934,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.63220670537569 + "value": 42.625018714518895 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -946,13 +946,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.78570113161689 + "value": 92.77053465047281 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3979138598004237 + "value": 3.397358446672589 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -964,7 +964,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.07610249989111 + "value": 60.066464073226456 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -983,19 +983,19 @@ "time" ], "times": { - "compilation_time": 23739.478, - "data": 77263.467, - "framework": 226799.34200000003, - "kernel_overhead": 32201.116, - "profiling_overhead": 64149.56, - "profiling_runs": 53185.199, + "compilation_time": 13656.641, + "data": 64033.476, + "framework": 195497.96500000003, + "kernel_overhead": 28639.999, + "profiling_overhead": 53738.133, + "profiling_runs": 49086.357, "runtimes": [ - 1785.216 + 1786.944 ], - "search_algorithm": 32.723, - "validation": 28.135 + "search_algorithm": 23.138, + "validation": 15.619 }, - "timestamp": "2026-03-05 08:58:3 UTC" + "timestamp": "2026-03-13 09:39:30 UTC" }, { "compilation_data": { @@ -1016,14 +1016,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -1031,49 +1031,49 @@ { "name": "time", "unit": "", - "value": 1749.376 + "value": 1750.304 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 37.06801098400486 + "value": 27.832814578977384 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3308.0 + "value": 3456.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871420.0 + "value": 1870828.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5387505637651557 + "value": 3.4932530263408057 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33523.0 + "value": 33924.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100375.0 + "value": 2100111.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.9960790924695573 + "value": 2.9954802664507256 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -1085,7 +1085,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14035228789763055 + "value": 0.14030270582811138 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1115,13 +1115,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.3774365188951 + "value": 98.39189211473479 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92256523959945 + "value": 99.90434827634792 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1181,7 +1181,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53929291667869 + "value": 39.532478914694174 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1193,13 +1193,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.88807924252993 + "value": 95.87168342968546 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9430445744946252 + "value": 1.9427123351230209 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1211,7 +1211,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.362620819453305 + "value": 58.35282431031613 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1230,19 +1230,19 @@ "time" ], "times": { - "compilation_time": 23699.458, - "data": 77649.478, - "framework": 232715.30500000002, - "kernel_overhead": 34816.117, - "profiling_overhead": 64360.759, - "profiling_runs": 55888.951, + "compilation_time": 13933.523, + "data": 65401.678, + "framework": 205428.967, + "kernel_overhead": 32372.173, + "profiling_overhead": 54710.918, + "profiling_runs": 52944.198, "runtimes": [ - 1749.376 + 1750.304 ], - "search_algorithm": 34.358, - "validation": 28.966 + "search_algorithm": 17.561, + "validation": 12.416 }, - "timestamp": "2026-03-05 08:58:3 UTC" + "timestamp": "2026-03-13 09:39:31 UTC" }, { "compilation_data": { @@ -1263,14 +1263,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -1278,49 +1278,49 @@ { "name": "time", "unit": "", - "value": 1761.088 + "value": 1741.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.907264989644894 + "value": 28.298179460782407 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3820.0 + "value": 240.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866652.0 + "value": 1864524.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5314718538865773 + "value": 3.518859581819769 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32563.0 + "value": 28772.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101849.0 + "value": 2098991.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4946475753164037 + "value": 1.4943897250673268 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -1332,7 +1332,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07002164243147418 + "value": 0.07000424993249102 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1362,13 +1362,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.7369421065671 + "value": 81.74260723095009 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93557808028555 + "value": 99.9257280145751 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1428,7 +1428,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.940535645157915 + "value": 37.934593085022215 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1440,13 +1440,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.6645113811566 + "value": 95.65017719349177 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1561018831463015 + "value": 1.1559286550483014 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1458,7 +1458,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.526208355551276 + "value": 53.5183388160655 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1477,19 +1477,19 @@ "time" ], "times": { - "compilation_time": 23685.291, - "data": 78190.631, - "framework": 214281.375, - "kernel_overhead": 25009.061, - "profiling_overhead": 65057.868, - "profiling_runs": 46023.815, + "compilation_time": 13579.687, + "data": 64021.437, + "framework": 182811.388, + "kernel_overhead": 22194.174, + "profiling_overhead": 53988.269, + "profiling_runs": 42607.508, "runtimes": [ - 1761.088 + 1741.76 ], - "search_algorithm": 34.246, - "validation": 26.896 + "search_algorithm": 20.045, + "validation": 13.461 }, - "timestamp": "2026-03-05 08:58:3 UTC" + "timestamp": "2026-03-13 09:39:31 UTC" }, { "compilation_data": { @@ -1510,14 +1510,14 @@ "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -1525,49 +1525,49 @@ { "name": "time", "unit": "", - "value": 5036.256 + "value": 5072.768 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.502511456516535 + "value": 9.588635739415896 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 932.0 + "value": 6520.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870868.0 + "value": 1870312.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2087752958969935 + "value": 1.2065750913640265 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 86644.0 + "value": 92934.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099613.0 + "value": 2101293.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.955361178183637 + "value": 7.954841736224723 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -1579,7 +1579,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.37280262557656435 + "value": 0.3727848550085191 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1609,13 +1609,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.65932195477647 + "value": 97.57424503773251 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96304194450488 + "value": 99.96876785872067 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1675,7 +1675,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.26869727517785 + "value": 36.26513015383146 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1687,13 +1687,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.824252340707808 + "value": 31.820912646454406 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288815256853201 + "value": 4.288365180869833 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1705,7 +1705,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.8132707652535 + "value": 84.80443029046585 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1724,19 +1724,19 @@ "time" ], "times": { - "compilation_time": 38163.739, - "data": 78482.367, - "framework": 2055944.9449999998, - "kernel_overhead": 937215.515, - "profiling_overhead": 65372.435, - "profiling_runs": 974874.628, + "compilation_time": 12964.069, + "data": 65241.53, + "framework": 2033676.3309999998, + "kernel_overhead": 938504.769, + "profiling_overhead": 54388.263, + "profiling_runs": 975541.769, "runtimes": [ - 5036.256 + 5072.768 ], - "search_algorithm": 51.765, - "validation": 25.886 + "search_algorithm": 31.782, + "validation": 17.514 }, - "timestamp": "2026-03-05 08:58:4 UTC" + "timestamp": "2026-03-13 09:39:32 UTC" }, { "compilation_data": { @@ -1757,14 +1757,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -1772,49 +1772,49 @@ { "name": "time", "unit": "", - "value": 5652.48 + "value": 5694.816 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.394571769264015 + "value": 8.615016645062456 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13952.0 + "value": 412.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1875804.0 + "value": 1871128.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1145089532655934 + "value": 1.1010689399689677 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110014.0 + "value": 95824.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108825.0 + "value": 2099079.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6247277006174636 + "value": 3.6246960469967497 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -1826,7 +1826,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16985528331244065 + "value": 0.16986943637160654 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -1856,13 +1856,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6001940199266 + "value": 98.73392674578345 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95914913395472 + "value": 99.97171899129378 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -1922,7 +1922,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82916869106652 + "value": 33.82778398688032 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -1934,13 +1934,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.00048196669724 + "value": 28.999251755001087 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.010775605112797 + "value": 2.0106903072315205 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -1952,7 +1952,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01932998461622 + "value": 85.01577820318393 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -1971,19 +1971,19 @@ "time" ], "times": { - "compilation_time": 24541.223, - "data": 79995.385, - "framework": 2633698.904, - "kernel_overhead": 1223463.404, - "profiling_overhead": 66650.71, - "profiling_runs": 1263589.405, + "compilation_time": 14114.672, + "data": 64248.526, + "framework": 2610953.772, + "kernel_overhead": 1226808.237, + "profiling_overhead": 53472.917, + "profiling_runs": 1266424.092, "runtimes": [ - 5652.48 + 5694.816 ], - "search_algorithm": 41.204, - "validation": 24.811 + "search_algorithm": 21.164, + "validation": 11.626 }, - "timestamp": "2026-03-05 08:58:6 UTC" + "timestamp": "2026-03-13 09:39:33 UTC" }, { "compilation_data": { @@ -2004,14 +2004,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -2019,49 +2019,49 @@ { "name": "time", "unit": "", - "value": 8013.44 + "value": 8063.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.189313221389154 + "value": 6.283676341194658 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16332.0 + "value": 19524.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1932604.0 + "value": 1936688.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.77089332021168 + "value": 48.767527188637146 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 968608.0 + "value": 973615.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418932.0 + "value": 138421553.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.268815657925955 + "value": 1.2712572009462428 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -2073,7 +2073,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05929962738909261 + "value": 0.059242074600756006 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2103,13 +2103,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.91210565187163 + "value": 91.06120526459911 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.38980851415344 + "value": 99.02011953455342 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2169,7 +2169,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.650280349933606 + "value": 10.679570754287457 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2181,13 +2181,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.365206236674894 + "value": 20.42140010748195 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.027838728746318 + "value": 11.058267929295647 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2199,7 +2199,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.98622427975115 + "value": 30.06902552361049 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2218,19 +2218,19 @@ "time" ], "times": { - "compilation_time": 23858.124, - "data": 78159.021, - "framework": 918072.5819999999, - "kernel_overhead": 361327.816, - "profiling_overhead": 65112.2, - "profiling_runs": 413473.545, + "compilation_time": 14084.45, + "data": 60331.688, + "framework": 877465.015, + "kernel_overhead": 357516.736, + "profiling_overhead": 50138.773, + "profiling_runs": 409477.818, "runtimes": [ - 8013.44 + 8063.36 ], - "search_algorithm": 40.389, - "validation": 24.587 + "search_algorithm": 21.502, + "validation": 15.481 }, - "timestamp": "2026-03-05 08:58:6 UTC" + "timestamp": "2026-03-13 09:39:34 UTC" }, { "compilation_data": { @@ -2251,14 +2251,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -2266,49 +2266,49 @@ { "name": "time", "unit": "", - "value": 8059.2 + "value": 8382.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.554184563722444 + "value": 6.413510059969771 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19976.0 + "value": 196.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2024444.0 + "value": 2020308.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.24017876637235 + "value": 54.20263848269499 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17152204.0 + "value": 17168434.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420548.0 + "value": 138413785.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6326617969800049 + "value": 0.6356024314961998 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -2320,7 +2320,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029527060969872645 + "value": 0.02980169720762608 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2350,13 +2350,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.25322446437328 + "value": 88.24275091013848 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.39824811511478 + "value": 100.4407949437812 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2416,7 +2416,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.164085373791956 + "value": 9.153129560773474 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2428,13 +2428,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.27917057997045 + "value": 20.25534077575423 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.649535136112412 + "value": 10.63702099820492 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2446,7 +2446,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.72815965897177 + "value": 27.695636552078074 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2465,19 +2465,19 @@ "time" ], "times": { - "compilation_time": 23165.279, - "data": 77944.504, - "framework": 882047.1429999999, - "kernel_overhead": 343819.281, - "profiling_overhead": 64384.98, - "profiling_runs": 395898.378, + "compilation_time": 13809.213, + "data": 62159.044, + "framework": 853320.324, + "kernel_overhead": 343405.938, + "profiling_overhead": 52145.757, + "profiling_runs": 395609.585, "runtimes": [ - 8059.2 + 8382.56 ], - "search_algorithm": 42.603, - "validation": 29.839 + "search_algorithm": 20.819, + "validation": 15.164 }, - "timestamp": "2026-03-05 08:58:7 UTC" + "timestamp": "2026-03-13 09:39:34 UTC" }, { "compilation_data": { @@ -2498,14 +2498,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -2513,49 +2513,49 @@ { "name": "time", "unit": "", - "value": 9279.392 + "value": 9547.52 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.287981037043913 + "value": 6.32302599031768 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 844.0 + "value": 1852.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2284464.0 + "value": 2288520.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 75.05927508481193 + "value": 75.23392989899871 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 110113746.0 + "value": 111496879.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417394.0 + "value": 138419087.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.277256639031899 + "value": 0.2773444689236649 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -2567,7 +2567,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01279526175463149 + "value": 0.01282342037438627 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2597,13 +2597,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.05232653461279 + "value": 91.88202686374977 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.90666320961353 + "value": 98.0279153563047 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2663,7 +2663,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.3551326086856275 + "value": 7.4387690247722835 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2675,13 +2675,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.662912502972294 + "value": 17.86046680770238 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.131156549082966 + "value": 9.233285758132274 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2693,7 +2693,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.2226782307264 + "value": 23.4824681896418 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2712,19 +2712,19 @@ "time" ], "times": { - "compilation_time": 24339.71, - "data": 79323.557, - "framework": 874243.786, - "kernel_overhead": 335047.331, - "profiling_overhead": 65887.142, - "profiling_runs": 393985.756, + "compilation_time": 13067.866, + "data": 61728.469, + "framework": 832414.729, + "kernel_overhead": 330272.982, + "profiling_overhead": 51607.952, + "profiling_runs": 388805.326, "runtimes": [ - 9279.392 + 9547.52 ], - "search_algorithm": 43.566, - "validation": 30.712 + "search_algorithm": 29.393, + "validation": 14.131 }, - "timestamp": "2026-03-05 08:58:7 UTC" + "timestamp": "2026-03-13 09:39:35 UTC" }, { "compilation_data": { @@ -2745,14 +2745,14 @@ "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -2760,49 +2760,49 @@ { "name": "time", "unit": "", - "value": 3366.56 + "value": 3432.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.81636007159966 + "value": 14.25429101964633 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 152.0 + "value": 15540.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870140.0 + "value": 1880860.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.807543781439878 + "value": 1.8256710163815433 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 58473.0 + "value": 78447.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2134319.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.0603114780106395 + "value": 6.059491718883135 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -2814,7 +2814,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2839598393713506 + "value": 0.2839478151148024 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -2844,13 +2844,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.32896806220486 + "value": 98.31439879116267 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94940131107272 + "value": 99.95440736089881 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -2910,7 +2910,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.3840702965069 + "value": 41.38078969006535 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -2922,13 +2922,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48701304561159 + "value": 48.48253157857601 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.36189250609221 + "value": 3.3615817793739224 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -2940,7 +2940,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59917436439628 + "value": 87.59117074591536 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -2959,19 +2959,19 @@ "time" ], "times": { - "compilation_time": 28215.263, - "data": 79211.125, - "framework": 1355583.8590000002, - "kernel_overhead": 591129.496, - "profiling_overhead": 65506.276, - "profiling_runs": 619736.962, + "compilation_time": 13061.264, + "data": 61445.276, + "framework": 1333557.745, + "kernel_overhead": 595954.893, + "profiling_overhead": 51376.48, + "profiling_runs": 624781.096, "runtimes": [ - 3366.56 + 3432.96 ], - "search_algorithm": 43.941, - "validation": 24.311 + "search_algorithm": 20.507, + "validation": 14.662 }, - "timestamp": "2026-03-05 08:58:8 UTC" + "timestamp": "2026-03-13 09:39:35 UTC" }, { "compilation_data": { @@ -2992,14 +2992,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -3007,49 +3007,49 @@ { "name": "time", "unit": "", - "value": 4003.552 + "value": 4067.52 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.807840367486897 + "value": 12.065163768618515 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6500.0 + "value": 484.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869964.0 + "value": 1869960.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.530953336101577 + "value": 1.5249789663890714 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 74673.0 + "value": 70175.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100638.0 + "value": 2099095.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539996258228092 + "value": 2.5397549873578837 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -3061,7 +3061,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11899256640229201 + "value": 0.1190199681954779 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3091,13 +3091,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.95186650920881 + "value": 98.94555506384253 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93525412485533 + "value": 99.96648658778236 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3157,7 +3157,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.969446126094475 + "value": 40.96665806808 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3169,13 +3169,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.64244363779581 + "value": 40.6391020271737 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4883707386888112 + "value": 1.4882483652529432 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3187,7 +3187,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.27510468850242 + "value": 90.2677601097571 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3206,19 +3206,19 @@ "time" ], "times": { - "compilation_time": 23089.798, - "data": 79263.028, - "framework": 1922833.412, - "kernel_overhead": 873048.944, - "profiling_overhead": 65324.961, - "profiling_runs": 905196.479, + "compilation_time": 13329.261, + "data": 60877.281, + "framework": 1894399.858, + "kernel_overhead": 875950.306, + "profiling_overhead": 49957.203, + "profiling_runs": 907615.068, "runtimes": [ - 4003.552 + 4067.52 ], - "search_algorithm": 39.768, - "validation": 26.124 + "search_algorithm": 24.168, + "validation": 15.35 }, - "timestamp": "2026-03-05 08:58:9 UTC" + "timestamp": "2026-03-13 09:39:36 UTC" }, { "compilation_data": { @@ -3239,14 +3239,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -3254,49 +3254,49 @@ { "name": "time", "unit": "", - "value": 8081.152 + "value": 8092.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.473052487668882 + "value": 6.47219830393929 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4356.0 + "value": 15272.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2018548.0 + "value": 2019612.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.79576867244131 + "value": 56.83382157061392 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 24425825.0 + "value": 24467384.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414879.0 + "value": 138417132.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6333221065974899 + "value": 0.6332243162338091 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -3308,7 +3308,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029555182240722004 + "value": 0.029514130826861145 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3338,13 +3338,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.36712736096173 + "value": 90.41484223138787 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.55225886207498 + "value": 99.97061909768424 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3404,7 +3404,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.000554932119748 + "value": 8.950291984088693 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3416,13 +3416,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.267081805700595 + "value": 20.154234807178394 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.576416307379046 + "value": 5.545366852463392 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3434,7 +3434,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.111467635614066 + "value": 19.99954520813405 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3453,19 +3453,19 @@ "time" ], "times": { - "compilation_time": 23188.755, - "data": 78986.207, - "framework": 577515.828, - "kernel_overhead": 190363.108, - "profiling_overhead": 65587.288, - "profiling_runs": 242579.225, + "compilation_time": 13141.641, + "data": 65367.029, + "framework": 552439.6240000001, + "kernel_overhead": 189927.064, + "profiling_overhead": 55300.722, + "profiling_runs": 241844.809, "runtimes": [ - 8081.152 + 8092.928 ], - "search_algorithm": 44.989, - "validation": 32.889 + "search_algorithm": 28.953, + "validation": 15.538 }, - "timestamp": "2026-03-05 08:58:9 UTC" + "timestamp": "2026-03-13 09:39:37 UTC" }, { "compilation_data": { @@ -3486,14 +3486,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -3501,49 +3501,49 @@ { "name": "time", "unit": "", - "value": 9321.728 + "value": 9518.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.343425334472226 + "value": 6.280588496260721 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 28472.0 + "value": 14060.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2272060.0 + "value": 2269156.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.93752652102069 + "value": 81.65403883653019 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133018806.0 + "value": 132976851.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138426928.0 + "value": 138424680.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2752852369893251 + "value": 0.27393017661975316 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -3555,7 +3555,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012788896839625048 + "value": 0.012718086514203912 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3585,13 +3585,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.88763212485345 + "value": 98.99613878402005 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.38350626080404 + "value": 98.07449821000799 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3651,7 +3651,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.247180761909658 + "value": 7.303431874321705 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3663,13 +3663,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.569421535480583 + "value": 17.705344173035794 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.6904693479121145 + "value": 4.7267563118199805 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3681,7 +3681,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.51125274818198 + "value": 16.639042150996122 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3700,19 +3700,19 @@ "time" ], "times": { - "compilation_time": 23421.374, - "data": 76565.916, - "framework": 558501.9369999999, - "kernel_overhead": 179959.425, - "profiling_overhead": 63211.784, - "profiling_runs": 238764.812, + "compilation_time": 13326.167, + "data": 63050.243, + "framework": 525875.102, + "kernel_overhead": 175836.173, + "profiling_overhead": 52441.992, + "profiling_runs": 234546.694, "runtimes": [ - 9321.728 + 9518.048 ], - "search_algorithm": 42.084, - "validation": 27.281 + "search_algorithm": 21.905, + "validation": 13.095 }, - "timestamp": "2026-03-05 08:58:10 UTC" + "timestamp": "2026-03-13 09:39:37 UTC" }, { "compilation_data": { @@ -3733,14 +3733,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -3748,49 +3748,49 @@ { "name": "time", "unit": "", - "value": 2560.96 + "value": 2550.816 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.25174237736757 + "value": 19.09064788842138 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7472.0 + "value": 8884.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873100.0 + "value": 1872788.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.423103083944468 + "value": 2.4097307002662167 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 52242.0 + "value": 54866.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100326.0 + "value": 2100911.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.067482104424384 + "value": 4.066936693371175 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -3802,7 +3802,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19059009488325 + "value": 0.19053323644379205 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -3832,13 +3832,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68732069440172 + "value": 98.70618661578031 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95141337840718 + "value": 99.93617420526064 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -3898,7 +3898,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.25513562540067 + "value": 45.24846643398096 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -3910,13 +3910,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.08637565786536 + "value": 65.07688054234542 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3835342648144056 + "value": 2.383186543298782 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -3928,7 +3928,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.7031973689859 + "value": 91.68994484156153 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -3947,19 +3947,19 @@ "time" ], "times": { - "compilation_time": 23739.516, - "data": 78451.291, - "framework": 1085704.6770000001, - "kernel_overhead": 458857.706, - "profiling_overhead": 64762.605, - "profiling_runs": 483633.075, + "compilation_time": 13802.381, + "data": 61211.306, + "framework": 1059974.401, + "kernel_overhead": 461636.205, + "profiling_overhead": 51025.393, + "profiling_runs": 486101.497, "runtimes": [ - 2560.96 + 2550.816 ], - "search_algorithm": 46.072, - "validation": 26.249 + "search_algorithm": 24.389, + "validation": 15.484 }, - "timestamp": "2026-03-05 08:58:10 UTC" + "timestamp": "2026-03-13 09:39:38 UTC" }, { "compilation_data": { @@ -3980,14 +3980,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -3995,49 +3995,49 @@ { "name": "time", "unit": "", - "value": 3200.48 + "value": 3267.616 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.788279958817633 + "value": 15.143757853861525 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 440.0 + "value": 8180.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869296.0 + "value": 1873736.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8990258327535976 + "value": 1.9125955228706149 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 55519.0 + "value": 62697.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099159.0 + "value": 2103565.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5936383952174313 + "value": 1.5934847442910576 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -4049,7 +4049,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07467841425540916 + "value": 0.0746722923682766 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4079,13 +4079,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69534744623671 + "value": 98.69823358023744 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96338152799524 + "value": 99.96528145553604 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4145,7 +4145,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37721957899672 + "value": 45.37304425318255 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4157,13 +4157,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.999139237216916 + "value": 50.993989294257226 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0334298234104013 + "value": 1.0333254666560912 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4175,7 +4175,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.17578619280563 + "value": 95.16627270211275 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4194,19 +4194,19 @@ "time" ], "times": { - "compilation_time": 23327.97, - "data": 77236.169, - "framework": 1822333.866, - "kernel_overhead": 826526.159, - "profiling_overhead": 64050.318, - "profiling_runs": 854521.22, + "compilation_time": 13765.721, + "data": 64645.271, + "framework": 1801194.245, + "kernel_overhead": 827456.625, + "profiling_overhead": 53769.655, + "profiling_runs": 855322.694, "runtimes": [ - 3200.48 + 3267.616 ], - "search_algorithm": 55.23, - "validation": 26.358 + "search_algorithm": 23.666, + "validation": 16.678 }, - "timestamp": "2026-03-05 08:58:11 UTC" + "timestamp": "2026-03-13 09:39:39 UTC" }, { "compilation_data": { @@ -4227,14 +4227,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -4242,49 +4242,49 @@ { "name": "time", "unit": "", - "value": 5331.424 + "value": 5413.408 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.821404398294506 + "value": 9.05261934283943 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 488.0 + "value": 924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866212.0 + "value": 1868116.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1540201302036333 + "value": 1.1585041074807547 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 88897.0 + "value": 91145.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2103229.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.47675879314446995 + "value": 0.47671469553645895 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -4296,7 +4296,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02234292557845685 + "value": 0.022342041602234203 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4326,13 +4326,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90297616084939 + "value": 73.90356864956635 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97325231065088 + "value": 99.97779295974777 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4392,7 +4392,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.99366709684244 + "value": 42.99004423503827 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4404,13 +4404,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.513702766878087 + "value": 30.51110974867372 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.36875690599620736 + "value": 0.36872556947249735 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4422,7 +4422,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.9481274731761 + "value": 96.93991939811305 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4441,19 +4441,19 @@ "time" ], "times": { - "compilation_time": 23384.551, - "data": 78405.678, - "framework": 3023664.119, - "kernel_overhead": 1420981.55, - "profiling_overhead": 64497.095, - "profiling_runs": 1459779.796, + "compilation_time": 13498.083, + "data": 57824.442, + "framework": 2998145.2290000003, + "kernel_overhead": 1427262.613, + "profiling_overhead": 47535.89, + "profiling_runs": 1465522.284, "runtimes": [ - 5331.424 + 5413.408 ], - "search_algorithm": 60.413, - "validation": 33.783 + "search_algorithm": 26.898, + "validation": 14.216 }, - "timestamp": "2026-03-05 08:58:13 UTC" + "timestamp": "2026-03-13 09:39:40 UTC" }, { "compilation_data": { @@ -4474,14 +4474,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -4489,49 +4489,49 @@ { "name": "time", "unit": "", - "value": 3221.088 + "value": 3242.944 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.75611216832672 + "value": 15.070156728064232 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5164.0 + "value": 6516.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868512.0 + "value": 1872272.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8967892115372322 + "value": 1.9070522582035985 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59727.0 + "value": 65532.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100313.0 + "value": 2107809.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5876613691223351 + "value": 1.5874798115324933 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -4543,7 +4543,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07440115786801932 + "value": 0.07439211777734903 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4573,13 +4573,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.74046242952453 + "value": 98.74010098233705 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96540695065934 + "value": 99.96617415286794 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4639,7 +4639,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.032733759484216 + "value": 42.02712046325702 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4651,13 +4651,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80876673964882 + "value": 50.80220334498012 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0295721775856572 + "value": 1.0294391791097046 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4669,7 +4669,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.26329991335997 + "value": 89.25186800331588 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4688,19 +4688,19 @@ "time" ], "times": { - "compilation_time": 23518.816, - "data": 78909.886, - "framework": 1650061.5929999999, - "kernel_overhead": 738690.668, - "profiling_overhead": 65527.093, - "profiling_runs": 766933.946, + "compilation_time": 13437.305, + "data": 57434.97, + "framework": 1620592.3730000001, + "kernel_overhead": 744021.449, + "profiling_overhead": 47456.893, + "profiling_runs": 771679.061, "runtimes": [ - 3221.088 + 3242.944 ], - "search_algorithm": 49.535, - "validation": 26.401 + "search_algorithm": 24.538, + "validation": 13.322 }, - "timestamp": "2026-03-05 08:58:14 UTC" + "timestamp": "2026-03-13 09:39:41 UTC" }, { "compilation_data": { @@ -4721,14 +4721,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -4736,49 +4736,49 @@ { "name": "time", "unit": "", - "value": 6089.536 + "value": 6152.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.410555028801188 + "value": 7.960563804610989 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 184.0 + "value": 808.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869012.0 + "value": 1867828.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0215993051422665 + "value": 1.021891986368899 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 101019.0 + "value": 101230.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099900.0 + "value": 2099637.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41934615503483813 + "value": 0.4193237777043955 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -4790,7 +4790,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019651603049772703 + "value": 0.01965114798494653 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -4820,13 +4820,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.00647950129411 + "value": 82.00754621887451 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96643101028718 + "value": 99.9707325752225 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -4886,7 +4886,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84847894496199 + "value": 42.84567406055009 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -4898,13 +4898,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.83999861366321 + "value": 26.838222238618968 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32436033480867404 + "value": 0.3243388673856541 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -4916,7 +4916,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.65866552637469 + "value": 93.65250742515222 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -4935,19 +4935,19 @@ "time" ], "times": { - "compilation_time": 23092.561, - "data": 78655.257, - "framework": 3501133.285, - "kernel_overhead": 1657649.455, - "profiling_overhead": 64852.55, - "profiling_runs": 1699976.023, + "compilation_time": 13696.832, + "data": 57523.789, + "framework": 3474573.912, + "kernel_overhead": 1663818.882, + "profiling_overhead": 47568.036, + "profiling_runs": 1705663.205, "runtimes": [ - 6089.536 + 6152.32 ], - "search_algorithm": 43.867, - "validation": 25.126 + "search_algorithm": 26.028, + "validation": 15.599 }, - "timestamp": "2026-03-05 08:58:16 UTC" + "timestamp": "2026-03-13 09:39:43 UTC" }, { "compilation_data": { @@ -4968,14 +4968,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -4983,49 +4983,49 @@ { "name": "time", "unit": "", - "value": 5761.952 + "value": 5820.832 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.98030276336702 + "value": 8.36579919043196 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8104.0 + "value": 508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866636.0 + "value": 1867156.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0758850751540154 + "value": 1.0720213682102864 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 103024.0 + "value": 96518.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102625.0 + "value": 2099287.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4409082544539691 + "value": 0.4408757108742221 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -5037,7 +5037,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02066399010146443 + "value": 0.02066238619418991 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5067,13 +5067,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.02083588741642 + "value": 82.01736147160477 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97706337878503 + "value": 99.97510000019841 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5133,7 +5133,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94881795781012 + "value": 43.94594429436074 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5145,13 +5145,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.219707132532395 + "value": 28.21807091673623 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34103405836434414 + "value": 0.3410142847603622 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5163,7 +5163,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.50478361515717 + "value": 94.49934478493837 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5182,19 +5182,19 @@ "time" ], "times": { - "compilation_time": 23865.401, - "data": 76549.517, - "framework": 3401054.5, - "kernel_overhead": 1610031.706, - "profiling_overhead": 63404.684, - "profiling_runs": 1651068.593, + "compilation_time": 13021.254, + "data": 57564.108, + "framework": 3381161.4390000002, + "kernel_overhead": 1617909.261, + "profiling_overhead": 47570.282, + "profiling_runs": 1658117.788, "runtimes": [ - 5761.952 + 5820.832 ], - "search_algorithm": 41.243, - "validation": 24.726 + "search_algorithm": 23.105, + "validation": 16.408 }, - "timestamp": "2026-03-05 08:58:17 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -5215,14 +5215,14 @@ "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -5230,49 +5230,49 @@ { "name": "time", "unit": "", - "value": 3625.856 + "value": 3718.144 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.483302040230573 + "value": 13.274145030646006 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 468.0 + "value": 7856.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870788.0 + "value": 1871644.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6840275896817833 + "value": 1.6937906639914093 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 63201.0 + "value": 70002.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100022.0 + "value": 2100398.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.51081365876383 + "value": 22.50579957544309 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -5284,7 +5284,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0548462876259923 + "value": 1.054723321279792 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5314,13 +5314,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 70.60733095398739 + "value": 70.29932076541618 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96605907945067 + "value": 99.96244008680353 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5380,7 +5380,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.0111206897065 + "value": 40.00648153038042 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5392,13 +5392,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.022055838910966 + "value": 45.01843726056193 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.958983582210724 + "value": 11.958022397336764 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5410,7 +5410,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.24553505901626 + "value": 57.240932863416795 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5429,19 +5429,19 @@ "time" ], "times": { - "compilation_time": 23988.888, - "data": 77465.072, - "framework": 283929.02999999997, - "kernel_overhead": 55881.283, - "profiling_overhead": 64463.299, - "profiling_runs": 86119.376, + "compilation_time": 13457.403, + "data": 58061.018, + "framework": 237839.14899999998, + "kernel_overhead": 50812.939, + "profiling_overhead": 48083.597, + "profiling_runs": 80881.595, "runtimes": [ - 3625.856 + 3718.144 ], - "search_algorithm": 45.473, - "validation": 27.765 + "search_algorithm": 37.562, + "validation": 19.367 }, - "timestamp": "2026-03-05 08:58:18 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -5462,14 +5462,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -5477,49 +5477,49 @@ { "name": "time", "unit": "", - "value": 2123.552 + "value": 2139.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 30.661987662066082 + "value": 23.263861287758345 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5648.0 + "value": 2652.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871160.0 + "value": 1870368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.952537824835645 + "value": 2.9347261086783023 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 41159.0 + "value": 38937.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100134.0 + "value": 2099440.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.91381513375865 + "value": 19.91093989344764 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -5531,7 +5531,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9329443407934526 + "value": 0.9329016663906669 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5561,13 +5561,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.72445583414333 + "value": 94.7530921155861 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9347416328595 + "value": 99.93916456977948 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5627,7 +5627,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.48829933666026 + "value": 50.485067492657066 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5639,13 +5639,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.66323734227547 + "value": 79.65606797698744 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.735865969955091 + "value": 10.734899785961199 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5657,7 +5657,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.24121721979165 + "value": 76.23439380528096 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5676,19 +5676,19 @@ "time" ], "times": { - "compilation_time": 24748.861, - "data": 76539.563, - "framework": 288926.78500000003, - "kernel_overhead": 63276.932, - "profiling_overhead": 63262.877, - "profiling_runs": 85847.413, + "compilation_time": 13768.848, + "data": 57583.361, + "framework": 247816.405, + "kernel_overhead": 60061.414, + "profiling_overhead": 47990.684, + "profiling_runs": 82180.946, "runtimes": [ - 2123.552 + 2139.776 ], - "search_algorithm": 39.52, - "validation": 24.318 + "search_algorithm": 29.641, + "validation": 15.903 }, - "timestamp": "2026-03-05 08:58:18 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -5709,14 +5709,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -5724,49 +5724,49 @@ { "name": "time", "unit": "", - "value": 1842.464 + "value": 1841.952 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.07673051155668 + "value": 26.397338488715842 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3816.0 + "value": 3056.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872660.0 + "value": 1871400.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3546267506444494 + "value": 3.3236052548179416 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37298.0 + "value": 36078.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103088.0 + "value": 2099760.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.29249043505999 + "value": 11.290473997700856 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -5778,7 +5778,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5290118643438448 + "value": 0.5288694479440943 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -5808,13 +5808,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.85549815869105 + "value": 95.82812139003512 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90820026508712 + "value": 99.89685347049594 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -5874,7 +5874,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.84379072782065 + "value": 45.839373795048495 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -5886,13 +5886,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.36764877670687 + "value": 90.35358236695286 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.265725647603698 + "value": 6.264750339896145 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -5904,7 +5904,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.45337668791642 + "value": 66.44307378033095 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -5923,19 +5923,19 @@ "time" ], "times": { - "compilation_time": 23609.302, - "data": 77391.488, - "framework": 238691.445, - "kernel_overhead": 37700.002, - "profiling_overhead": 64229.259, - "profiling_runs": 59370.696, + "compilation_time": 13366.617, + "data": 58871.505, + "framework": 197588.87600000002, + "kernel_overhead": 34328.733, + "profiling_overhead": 48832.301, + "profiling_runs": 55556.337, "runtimes": [ - 1842.464 + 1841.952 ], - "search_algorithm": 37.404, - "validation": 28.575 + "search_algorithm": 18.768, + "validation": 14.379 }, - "timestamp": "2026-03-05 08:58:18 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -5956,14 +5956,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -5971,49 +5971,49 @@ { "name": "time", "unit": "", - "value": 1809.856 + "value": 1837.728 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.10717142505538 + "value": 26.500539961683057 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4632.0 + "value": 4684.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873172.0 + "value": 1872080.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4378425441578444 + "value": 3.4309140942649607 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37245.0 + "value": 37341.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102981.0 + "value": 2101235.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.79806750344199 + "value": 5.797206092933397 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -6025,7 +6025,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.271544928885194 + "value": 0.2715322979912289 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6055,13 +6055,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.31134515014756 + "value": 98.359689151663 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89872967169758 + "value": 99.9109352296694 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6121,7 +6121,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.63065258675496 + "value": 42.62338692899004 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6133,13 +6133,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.78129564885306 + "value": 92.76564589046416 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.397752526203115 + "value": 3.3971794149339902 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6151,7 +6151,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.073450544712045 + "value": 60.06336231713292 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6170,19 +6170,19 @@ "time" ], "times": { - "compilation_time": 23984.829, - "data": 77434.289, - "framework": 227873.301, - "kernel_overhead": 32289.546, - "profiling_overhead": 64669.631, - "profiling_runs": 53479.835, + "compilation_time": 13577.951, + "data": 58117.824, + "framework": 185445.572, + "kernel_overhead": 29103.463, + "profiling_overhead": 48310.23, + "profiling_runs": 49914.055, "runtimes": [ - 1809.856 + 1837.728 ], - "search_algorithm": 29.336, - "validation": 25.526 + "search_algorithm": 20.836, + "validation": 12.404 }, - "timestamp": "2026-03-05 08:58:18 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -6203,14 +6203,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -6218,49 +6218,49 @@ { "name": "time", "unit": "", - "value": 1759.776 + "value": 1755.424 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.88605679702049 + "value": 28.092722892028878 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4720.0 + "value": 932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871476.0 + "value": 1872084.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5413931940320693 + "value": 3.556355924168465 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34133.0 + "value": 29802.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100132.0 + "value": 2100314.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.995810782991468 + "value": 2.9952640837335727 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -6272,7 +6272,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1403372204733366 + "value": 0.1402491568272737 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6302,13 +6302,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.35487056718864 + "value": 98.3595737905309 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91768621862913 + "value": 99.87680939730984 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6368,7 +6368,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53732126041529 + "value": 39.528378494386686 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6380,13 +6380,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.88246699405228 + "value": 95.86151677434778 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.942930849732993 + "value": 1.9425063213551914 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6398,7 +6398,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.35940850274993 + "value": 58.3467034570093 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6417,19 +6417,19 @@ "time" ], "times": { - "compilation_time": 23394.156, - "data": 78146.695, - "framework": 234223.10600000003, - "kernel_overhead": 34944.194, - "profiling_overhead": 64988.476, - "profiling_runs": 56143.741, + "compilation_time": 13604.882, + "data": 59623.043, + "framework": 194224.927, + "kernel_overhead": 32008.704, + "profiling_overhead": 50071.8, + "profiling_runs": 52521.38, "runtimes": [ - 1759.776 + 1755.424 ], - "search_algorithm": 36.021, - "validation": 27.244 + "search_algorithm": 21.328, + "validation": 14.487 }, - "timestamp": "2026-03-05 08:58:18 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -6450,14 +6450,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -6465,49 +6465,49 @@ { "name": "time", "unit": "", - "value": 1766.368 + "value": 1795.904 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.706781480737355 + "value": 27.867701738997784 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3868.0 + "value": 924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1866968.0 + "value": 1869788.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5345471409390568 + "value": 3.52133640501779 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32325.0 + "value": 29250.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101961.0 + "value": 2099630.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.494729454191167 + "value": 1.4943379101412644 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -6519,7 +6519,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07002956765974384 + "value": 0.06996971395551867 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6549,13 +6549,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.71541045816831 + "value": 81.7074426806898 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94107533056885 + "value": 99.8849699867053 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6615,7 +6615,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.943656862678395 + "value": 37.93239994961857 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6627,13 +6627,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.67007632098692 + "value": 95.6419997923446 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1561691352267707 + "value": 1.1558298314748676 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6645,7 +6645,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.529472807435695 + "value": 53.51376338360871 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6664,19 +6664,19 @@ "time" ], "times": { - "compilation_time": 26996.767, - "data": 77405.279, - "framework": 213877.56900000002, - "kernel_overhead": 25416.561, - "profiling_overhead": 64677.698, - "profiling_runs": 46378.031, + "compilation_time": 13422.813, + "data": 59593.235, + "framework": 174816.108, + "kernel_overhead": 22388.917, + "profiling_overhead": 50047.485, + "profiling_runs": 42786.471, "runtimes": [ - 1766.368 + 1795.904 ], - "search_algorithm": 35.299, - "validation": 29.349 + "search_algorithm": 29.27, + "validation": 14.876 }, - "timestamp": "2026-03-05 08:58:19 UTC" + "timestamp": "2026-03-13 09:39:45 UTC" }, { "compilation_data": { @@ -6697,14 +6697,14 @@ "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -6712,49 +6712,49 @@ { "name": "time", "unit": "", - "value": 5089.888 + "value": 5159.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.488204138540798 + "value": 9.518882471592383 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8484.0 + "value": 5808.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871756.0 + "value": 1872796.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2149121270152066 + "value": 1.2159524639114718 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 94975.0 + "value": 96144.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101539.0 + "value": 2107595.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.955330623115381 + "value": 7.954727189749554 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -6766,7 +6766,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3728045907633021 + "value": 0.37276893786334964 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -6796,13 +6796,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.29965499621049 + "value": 97.16883784096825 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96803131613173 + "value": 99.96496214103395 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -6862,7 +6862,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.2670648971134 + "value": 36.26446962192443 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -6874,13 +6874,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.8228317522826 + "value": 31.820765345889647 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288623810366209 + "value": 4.288345329817159 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -6892,7 +6892,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.80955227851396 + "value": 84.80406107929313 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -6911,19 +6911,19 @@ "time" ], "times": { - "compilation_time": 24956.992, - "data": 78760.572, - "framework": 2094173.5379999997, - "kernel_overhead": 956100.512, - "profiling_overhead": 65373.641, - "profiling_runs": 993938.813, + "compilation_time": 13026.26, + "data": 57987.826, + "framework": 2065597.763, + "kernel_overhead": 961263.118, + "profiling_overhead": 47990.466, + "profiling_runs": 998356.353, "runtimes": [ - 5089.888 + 5159.36 ], - "search_algorithm": 43.43, - "validation": 28.609 + "search_algorithm": 25.943, + "validation": 15.255 }, - "timestamp": "2026-03-05 08:58:20 UTC" + "timestamp": "2026-03-13 09:39:47 UTC" }, { "compilation_data": { @@ -6944,14 +6944,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -6959,49 +6959,49 @@ { "name": "time", "unit": "", - "value": 5636.992 + "value": 5756.032 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.26038379343097 + "value": 8.55675457074028 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10284.0 + "value": 8612.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871400.0 + "value": 1873644.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1065764689527726 + "value": 1.1096452493547357 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 104862.0 + "value": 107082.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100778.0 + "value": 2109418.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.624865238181779 + "value": 3.624640963810151 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -7013,7 +7013,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1698796989612288 + "value": 0.16987352834251276 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7043,13 +7043,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.66345354418881 + "value": 98.66702736100524 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97334613030138 + "value": 99.97499964747925 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7109,7 +7109,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82912344661895 + "value": 33.82803292958803 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7121,13 +7121,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.00053171998626 + "value": 28.99899868902192 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.010779054803735 + "value": 2.0106727606646055 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7139,7 +7139,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01953828842187 + "value": 85.01505725900279 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7158,19 +7158,19 @@ "time" ], "times": { - "compilation_time": 23946.692, - "data": 78781.421, - "framework": 2674624.573, - "kernel_overhead": 1245289.415, - "profiling_overhead": 64785.487, - "profiling_runs": 1285768.25, + "compilation_time": 13985.204, + "data": 62166.857, + "framework": 2654925.2520000003, + "kernel_overhead": 1250885.075, + "profiling_overhead": 50871.956, + "profiling_runs": 1291001.364, "runtimes": [ - 5636.992 + 5756.032 ], - "search_algorithm": 43.258, - "validation": 27.305 + "search_algorithm": 25.914, + "validation": 17.771 }, - "timestamp": "2026-03-05 08:58:21 UTC" + "timestamp": "2026-03-13 09:39:48 UTC" }, { "compilation_data": { @@ -7191,14 +7191,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -7206,49 +7206,49 @@ { "name": "time", "unit": "", - "value": 7968.48 + "value": 8055.04 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.213741561021145 + "value": 6.260086455331412 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7112.0 + "value": 15192.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1930164.0 + "value": 1931144.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.29505360801224 + "value": 49.266592908175404 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 1189163.0 + "value": 1200427.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418599.0 + "value": 138417546.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2720713942208848 + "value": 1.2783202307285508 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -7260,7 +7260,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05983444294622985 + "value": 0.059691113062925016 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7290,13 +7290,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.48216949304346 + "value": 97.51032740096758 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.29803377838853 + "value": 99.83519608176032 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7356,7 +7356,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.648445960233728 + "value": 10.671041326042738 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7368,13 +7368,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.362801831296213 + "value": 20.40820010556123 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.026536733841553 + "value": 11.05112007669307 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7386,7 +7386,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.982748301906142 + "value": 30.049609096220436 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7405,19 +7405,19 @@ "time" ], "times": { - "compilation_time": 24034.704, - "data": 77247.88, - "framework": 924329.567, - "kernel_overhead": 365573.167, - "profiling_overhead": 64042.0, - "profiling_runs": 417466.52, + "compilation_time": 13830.299, + "data": 59164.738, + "framework": 894372.412, + "kernel_overhead": 367136.288, + "profiling_overhead": 49041.685, + "profiling_runs": 419029.701, "runtimes": [ - 7968.48 + 8055.04 ], - "search_algorithm": 42.494, - "validation": 27.571 + "search_algorithm": 25.595, + "validation": 14.667 }, - "timestamp": "2026-03-05 08:58:22 UTC" + "timestamp": "2026-03-13 09:39:48 UTC" }, { "compilation_data": { @@ -7438,14 +7438,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -7453,49 +7453,49 @@ { "name": "time", "unit": "", - "value": 8051.872 + "value": 8502.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.513612388747786 + "value": 6.3602487868904625 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 20452.0 + "value": 22976.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2021856.0 + "value": 2023864.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.428578080313386 + "value": 54.56646998103 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 18354782.0 + "value": 18157608.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138423365.0 + "value": 138422573.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6297278110693573 + "value": 0.6310957584696835 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -7507,7 +7507,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029395657997212197 + "value": 0.029443231087911578 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7537,13 +7537,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.93189658289434 + "value": 89.90475925765347 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.22107128035603 + "value": 99.93561734222605 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7603,7 +7603,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.140009271759984 + "value": 9.088813533163375 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7615,13 +7615,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.2249739903818 + "value": 20.11286161754474 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.621073987624818 + "value": 10.562198569174496 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7633,7 +7633,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.654120406567255 + "value": 27.500840848582143 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7652,19 +7652,19 @@ "time" ], "times": { - "compilation_time": 24151.263, - "data": 77657.267, - "framework": 896889.344, - "kernel_overhead": 351232.27, - "profiling_overhead": 64127.984, - "profiling_runs": 403871.823, + "compilation_time": 13984.769, + "data": 60539.798, + "framework": 867998.5009999999, + "kernel_overhead": 352153.604, + "profiling_overhead": 50124.313, + "profiling_runs": 405180.786, "runtimes": [ - 8051.872 + 8502.048 ], - "search_algorithm": 45.961, - "validation": 29.653 + "search_algorithm": 25.173, + "validation": 15.782 }, - "timestamp": "2026-03-05 08:58:22 UTC" + "timestamp": "2026-03-13 09:39:49 UTC" }, { "compilation_data": { @@ -7685,14 +7685,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -7700,49 +7700,49 @@ { "name": "time", "unit": "", - "value": 9519.712 + "value": 9767.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.108039600926485 + "value": 6.242474160206719 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8412.0 + "value": 23204.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2294676.0 + "value": 2296000.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 77.22209660510792 + "value": 77.27225884050151 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 121510579.0 + "value": 121369623.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415654.0 + "value": 138425733.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2719596153914386 + "value": 0.2718645291534466 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -7754,7 +7754,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01256507703717447 + "value": 0.012555228753141336 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -7784,13 +7784,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.10563812804043 + "value": 95.26132949647634 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.65757846700416 + "value": 98.42321466598666 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -7850,7 +7850,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.238217831223862 + "value": 7.250918390399337 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -7862,13 +7862,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.388951544653178 + "value": 17.416696236211198 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.98952756245193 + "value": 9.00387067387139 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -7880,7 +7880,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.862536893725075 + "value": 22.899027927000642 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -7899,19 +7899,19 @@ "time" ], "times": { - "compilation_time": 23936.45, - "data": 78778.432, - "framework": 885390.257, - "kernel_overhead": 340305.34, - "profiling_overhead": 65606.78, - "profiling_runs": 400699.705, + "compilation_time": 14240.65, + "data": 59715.488, + "framework": 853463.687, + "kernel_overhead": 341872.934, + "profiling_overhead": 49166.208, + "profiling_runs": 402709.057, "runtimes": [ - 9519.712 + 9767.36 ], - "search_algorithm": 42.989, - "validation": 29.298 + "search_algorithm": 26.958, + "validation": 16.798 }, - "timestamp": "2026-03-05 08:58:23 UTC" + "timestamp": "2026-03-13 09:39:49 UTC" }, { "compilation_data": { @@ -7932,14 +7932,14 @@ "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -7947,49 +7947,49 @@ { "name": "time", "unit": "", - "value": 3413.152 + "value": 3676.416 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.75272059200082 + "value": 13.975715828769589 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4804.0 + "value": 12896.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870060.0 + "value": 1875136.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.814011269405932 + "value": 1.8259583311094798 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62776.0 + "value": 72316.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2105783.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.060256575324432 + "value": 6.059545600969919 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -8001,7 +8001,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28394328782780903 + "value": 0.2839379241637574 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8031,13 +8031,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.911285400053 + "value": 98.14280173270603 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94751144233335 + "value": 99.95327815379808 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8097,7 +8097,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.38183186501486 + "value": 41.379535780047014 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8109,13 +8109,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48510358750233 + "value": 48.481390457967585 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3617601120240876 + "value": 3.3615026587067365 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8127,7 +8127,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59582903647059 + "value": 87.58914272849063 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8146,19 +8146,19 @@ "time" ], "times": { - "compilation_time": 23942.146, - "data": 77949.289, - "framework": 1374214.727, - "kernel_overhead": 601008.635, - "profiling_overhead": 64979.452, - "profiling_runs": 630277.351, + "compilation_time": 13640.987, + "data": 58259.395, + "framework": 1342713.919, + "kernel_overhead": 603484.534, + "profiling_overhead": 48165.951, + "profiling_runs": 632804.039, "runtimes": [ - 3413.152 + 3676.416 ], - "search_algorithm": 45.135, - "validation": 30.304 + "search_algorithm": 26.265, + "validation": 15.467 }, - "timestamp": "2026-03-05 08:58:24 UTC" + "timestamp": "2026-03-13 09:39:50 UTC" }, { "compilation_data": { @@ -8179,14 +8179,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -8194,49 +8194,49 @@ { "name": "time", "unit": "", - "value": 4018.24 + "value": 4210.272 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.786233077105779 + "value": 11.862141514912011 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1432.0 + "value": 10140.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870204.0 + "value": 1872528.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.522631298547231 + "value": 1.5191505520101931 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68994.0 + "value": 81002.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098838.0 + "value": 2101653.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539960480740308 + "value": 2.5397727346698145 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -8248,7 +8248,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11902826902016136 + "value": 0.11900704081409803 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8278,13 +8278,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.8134396279408 + "value": 98.8669592396122 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9638594993722 + "value": 99.95594300731703 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8344,7 +8344,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.97162586373449 + "value": 40.966309128121544 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8356,13 +8356,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.64300441081266 + "value": 40.638974241119975 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4883912748100339 + "value": 1.4882436855878898 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8374,7 +8374,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.27643673242108 + "value": 90.26750473247913 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8393,19 +8393,19 @@ "time" ], "times": { - "compilation_time": 23519.892, - "data": 77752.174, - "framework": 1951525.855, - "kernel_overhead": 888404.147, - "profiling_overhead": 64583.68, - "profiling_runs": 920785.854, + "compilation_time": 13230.829, + "data": 60605.908, + "framework": 1926204.6260000002, + "kernel_overhead": 891593.946, + "profiling_overhead": 50489.035, + "profiling_runs": 923515.737, "runtimes": [ - 4018.24 + 4210.272 ], - "search_algorithm": 42.947, - "validation": 28.688 + "search_algorithm": 27.301, + "validation": 18.108 }, - "timestamp": "2026-03-05 08:58:25 UTC" + "timestamp": "2026-03-13 09:39:51 UTC" }, { "compilation_data": { @@ -8426,14 +8426,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -8441,49 +8441,49 @@ { "name": "time", "unit": "", - "value": 8105.695 + "value": 8303.808 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.49620818685416 + "value": 6.40172827953853 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19088.0 + "value": 4868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2022612.0 + "value": 2017988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 57.13862558415988 + "value": 57.23745756213624 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 25549471.0 + "value": 25652550.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138428000.0 + "value": 138423121.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.632692679521131 + "value": 0.6312728550604355 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -8495,7 +8495,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02949538268122726 + "value": 0.02945935043570734 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8525,13 +8525,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.63900562744628 + "value": 91.7025527701918 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.49778428179343 + "value": 99.48985471143669 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8591,7 +8591,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.987357173890159 + "value": 8.976818138966234 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8603,13 +8603,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.237148718833968 + "value": 20.2140375241675 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.568180323761201 + "value": 5.561821359799017 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8621,7 +8621,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.081828008711465 + "value": 20.05891015741156 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8640,19 +8640,19 @@ "time" ], "times": { - "compilation_time": 23246.675, - "data": 79161.263, - "framework": 583115.382, - "kernel_overhead": 193264.133, - "profiling_overhead": 64954.777, - "profiling_runs": 245735.209, + "compilation_time": 13511.5, + "data": 60082.512, + "framework": 548995.833, + "kernel_overhead": 193367.178, + "profiling_overhead": 49544.384, + "profiling_runs": 246001.759, "runtimes": [ - 8105.695 + 8303.808 ], - "search_algorithm": 44.439, - "validation": 32.032 + "search_algorithm": 26.803, + "validation": 20.728 }, - "timestamp": "2026-03-05 08:58:25 UTC" + "timestamp": "2026-03-13 09:39:52 UTC" }, { "compilation_data": { @@ -8673,14 +8673,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -8688,49 +8688,49 @@ { "name": "time", "unit": "", - "value": 9374.08 + "value": 9562.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.313393460609035 + "value": 6.225000697452207 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22144.0 + "value": 6092.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2276492.0 + "value": 2278796.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.74701818545796 + "value": 81.73561573473982 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132980725.0 + "value": 133187151.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425674.0 + "value": 138415966.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27354139990324094 + "value": 0.2739006620849816 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -8742,7 +8742,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012747134711134554 + "value": 0.012789094100006218 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -8772,13 +8772,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.78240329563454 + "value": 98.78420895283412 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.0547981587486 + "value": 99.56574674175764 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -8838,7 +8838,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.247046953817164 + "value": 7.233762909093501 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -8850,13 +8850,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.57016141480597 + "value": 17.537533789772603 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.690666871848224 + "value": 4.681956347440512 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -8868,7 +8868,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.512002458387595 + "value": 16.481353021762146 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -8887,19 +8887,19 @@ "time" ], "times": { - "compilation_time": 23564.857, - "data": 79101.421, - "framework": 567633.031, - "kernel_overhead": 181854.794, - "profiling_overhead": 65432.133, - "profiling_runs": 241244.683, + "compilation_time": 13073.774, + "data": 60135.174, + "framework": 529899.45, + "kernel_overhead": 180467.546, + "profiling_overhead": 49902.893, + "profiling_runs": 239393.837, "runtimes": [ - 9374.08 + 9562.176 ], - "search_algorithm": 43.86, - "validation": 26.859 + "search_algorithm": 23.886, + "validation": 15.483 }, - "timestamp": "2026-03-05 08:58:25 UTC" + "timestamp": "2026-03-13 09:39:52 UTC" }, { "compilation_data": { @@ -8920,14 +8920,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -8935,49 +8935,49 @@ { "name": "time", "unit": "", - "value": 2567.712 + "value": 2606.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 25.207907242540905 + "value": 18.646266711484312 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4852.0 + "value": 9032.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872520.0 + "value": 1872612.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.419401641750456 + "value": 2.429362155272285 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 48727.0 + "value": 53484.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100432.0 + "value": 2105068.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.067263591147413 + "value": 4.066855499621977 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -8989,7 +8989,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19058770047052112 + "value": 0.19055846983781827 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9019,13 +9019,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.50990289124503 + "value": 98.60720811885393 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95062588354064 + "value": 99.94983820247597 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9085,7 +9085,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.25549293826121 + "value": 45.248621371022594 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9097,13 +9097,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.08607076631687 + "value": 65.07660129762041 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.383523099352424 + "value": 2.3831763170515283 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9115,7 +9115,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.70290769523841 + "value": 91.68959600722548 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9134,19 +9134,19 @@ "time" ], "times": { - "compilation_time": 23574.048, - "data": 77517.917, - "framework": 1103053.559, - "kernel_overhead": 468252.871, - "profiling_overhead": 64095.719, - "profiling_runs": 493187.052, + "compilation_time": 13671.837, + "data": 60386.798, + "framework": 1068408.703, + "kernel_overhead": 466696.578, + "profiling_overhead": 49962.565, + "profiling_runs": 491362.762, "runtimes": [ - 2567.712 + 2606.656 ], - "search_algorithm": 44.915, - "validation": 27.987 + "search_algorithm": 83.174, + "validation": 15.4 }, - "timestamp": "2026-03-05 08:58:26 UTC" + "timestamp": "2026-03-13 09:39:52 UTC" }, { "compilation_data": { @@ -9167,14 +9167,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -9182,49 +9182,49 @@ { "name": "time", "unit": "", - "value": 3243.936 + "value": 3403.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.76997696907661 + "value": 14.967982685179496 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7396.0 + "value": 524.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871492.0 + "value": 1870584.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9056478579268425 + "value": 1.9047070231718157 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59948.0 + "value": 54076.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100194.0 + "value": 2099384.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5935895927919896 + "value": 1.5933653701416552 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -9236,7 +9236,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0746768167499711 + "value": 0.07466498281575212 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9266,13 +9266,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.60599824542064 + "value": 98.59393961313472 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96883889066275 + "value": 99.95883320378765 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9332,7 +9332,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37399839717826 + "value": 45.37124393274073 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9344,13 +9344,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.995264258032535 + "value": 50.99228682635527 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0333513021036866 + "value": 1.0332909684051483 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9362,7 +9362,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16866195031263 + "value": 95.16313122613202 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9381,19 +9381,19 @@ "time" ], "times": { - "compilation_time": 24698.927, - "data": 77318.331, - "framework": 1845069.205, - "kernel_overhead": 837620.462, - "profiling_overhead": 64158.042, - "profiling_runs": 865972.37, + "compilation_time": 13395.907, + "data": 58106.009, + "framework": 1816746.463, + "kernel_overhead": 841553.396, + "profiling_overhead": 47703.216, + "profiling_runs": 869383.842, "runtimes": [ - 3243.936 + 3403.872 ], - "search_algorithm": 45.122, - "validation": 33.435 + "search_algorithm": 25.326, + "validation": 13.25 }, - "timestamp": "2026-03-05 08:58:27 UTC" + "timestamp": "2026-03-13 09:39:53 UTC" }, { "compilation_data": { @@ -9414,14 +9414,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -9429,49 +9429,49 @@ { "name": "time", "unit": "", - "value": 5376.352 + "value": 5476.512 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.751102164837112 + "value": 8.896491312566978 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 560.0 + "value": 19980.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1865688.0 + "value": 1871508.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1553378586569238 + "value": 1.1669972281057701 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 89059.0 + "value": 111029.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099231.0 + "value": 2103679.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.47674492032141996 + "value": 0.4767256536101042 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -9483,7 +9483,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02234357479304373 + "value": 0.022342026165770853 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9513,13 +9513,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8668059802131 + "value": 73.86720951220232 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97879901503529 + "value": 99.97500601732874 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9579,7 +9579,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.99254821397175 + "value": 42.991224419278794 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9591,13 +9591,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.5128964853652 + "value": 30.511939207130567 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3687471621156195 + "value": 0.36873559344554757 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9609,7 +9609,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.94559621918853 + "value": 96.94255475410675 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9628,19 +9628,19 @@ "time" ], "times": { - "compilation_time": 23545.831, - "data": 77761.116, - "framework": 3064901.3959999997, - "kernel_overhead": 1441717.68, - "profiling_overhead": 64615.698, - "profiling_runs": 1480806.902, + "compilation_time": 13382.007, + "data": 59647.102, + "framework": 3040552.2479999997, + "kernel_overhead": 1446340.699, + "profiling_overhead": 49477.868, + "profiling_runs": 1485086.579, "runtimes": [ - 5376.352 + 5476.512 ], - "search_algorithm": 43.662, - "validation": 31.737 + "search_algorithm": 24.619, + "validation": 18.279 }, - "timestamp": "2026-03-05 08:58:29 UTC" + "timestamp": "2026-03-13 09:39:55 UTC" }, { "compilation_data": { @@ -9661,14 +9661,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -9676,49 +9676,49 @@ { "name": "time", "unit": "", - "value": 3239.712 + "value": 3259.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.676899672597482 + "value": 15.034516992981159 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6868.0 + "value": 4204.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870176.0 + "value": 1871180.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.900316093497477 + "value": 1.8994181248726427 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59496.0 + "value": 57103.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102625.0 + "value": 2102177.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5876349387156388 + "value": 1.587468515881214 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -9730,7 +9730,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07439643260387332 + "value": 0.07439025778209998 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -9760,13 +9760,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68730616615201 + "value": 98.68602367249909 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96314310033935 + "value": 99.96579663018427 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -9826,7 +9826,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.031044473247256 + "value": 42.026194324026925 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -9838,13 +9838,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80669042848965 + "value": 50.801125009237715 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0295301038976175 + "value": 1.0294173280680492 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -9856,7 +9856,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.25976211512703 + "value": 89.25000702586684 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -9875,19 +9875,19 @@ "time" ], "times": { - "compilation_time": 23438.894, - "data": 78474.582, - "framework": 1673852.11, - "kernel_overhead": 750851.56, - "profiling_overhead": 65227.097, - "profiling_runs": 779298.871, + "compilation_time": 13403.242, + "data": 59489.271, + "framework": 1645560.0869999998, + "kernel_overhead": 754426.085, + "profiling_overhead": 49392.814, + "profiling_runs": 782251.917, "runtimes": [ - 3239.712 + 3259.232 ], - "search_algorithm": 41.534, - "validation": 27.87 + "search_algorithm": 24.036, + "validation": 16.376 }, - "timestamp": "2026-03-05 08:58:29 UTC" + "timestamp": "2026-03-13 09:39:56 UTC" }, { "compilation_data": { @@ -9908,14 +9908,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -9923,49 +9923,49 @@ { "name": "time", "unit": "", - "value": 6196.224 + "value": 6226.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.408186410879203 + "value": 7.887015447469977 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14536.0 + "value": 5816.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872300.0 + "value": 1867200.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0350176111743024 + "value": 1.0270978028254016 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 116833.0 + "value": 105277.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109434.0 + "value": 2101160.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41934956282347363 + "value": 0.41932906661507807 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -9977,7 +9977,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01965395350598828 + "value": 0.019651875847355206 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10007,13 +10007,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97095322328906 + "value": 81.97553947736539 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97934039681408 + "value": 99.97570943063904 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10073,7 +10073,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84780345181573 + "value": 42.84512996444503 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10085,13 +10085,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.839742837875757 + "value": 26.83788023083479 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32435724376827396 + "value": 0.3243347342349419 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10103,7 +10103,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.65781358633983 + "value": 93.6513139822236 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10122,19 +10122,19 @@ "time" ], "times": { - "compilation_time": 24051.955, - "data": 77901.988, - "framework": 3544146.81, - "kernel_overhead": 1679457.55, - "profiling_overhead": 64646.994, - "profiling_runs": 1722140.278, + "compilation_time": 13911.353, + "data": 59725.602, + "framework": 3522754.5779999997, + "kernel_overhead": 1686152.16, + "profiling_overhead": 48671.242, + "profiling_runs": 1728205.574, "runtimes": [ - 6196.224 + 6226.656 ], - "search_algorithm": 46.478, - "validation": 29.403 + "search_algorithm": 24.132, + "validation": 13.743 }, - "timestamp": "2026-03-05 08:58:31 UTC" + "timestamp": "2026-03-13 09:39:58 UTC" }, { "compilation_data": { @@ -10155,14 +10155,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -10170,49 +10170,49 @@ { "name": "time", "unit": "", - "value": 5797.92 + "value": 5852.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.901583137056141 + "value": 8.389816318969448 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2592.0 + "value": 11308.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869536.0 + "value": 1872736.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0732266868130578 + "value": 1.080135320686978 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98147.0 + "value": 107593.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099935.0 + "value": 2103178.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.440909611544948 + "value": 0.4408730712638775 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -10224,7 +10224,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02066316031938708 + "value": 0.020661025014165783 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10254,13 +10254,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98268067041548 + "value": 81.98062186061263 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97251301932123 + "value": 99.96918532458558 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10320,7 +10320,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94888545694018 + "value": 43.94575174575264 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10332,13 +10332,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.219858343080162 + "value": 28.217881401236966 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3410358857379072 + "value": 0.3410119944729565 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10350,7 +10350,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.50533300013349 + "value": 94.49871148406723 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10369,19 +10369,19 @@ "time" ], "times": { - "compilation_time": 23880.298, - "data": 77182.508, - "framework": 3450298.107, - "kernel_overhead": 1633873.925, - "profiling_overhead": 64196.645, - "profiling_runs": 1675045.029, + "compilation_time": 13853.837, + "data": 59305.4, + "framework": 3429615.082, + "kernel_overhead": 1640254.03, + "profiling_overhead": 49187.747, + "profiling_runs": 1680867.905, "runtimes": [ - 5797.92 + 5852.544 ], - "search_algorithm": 45.877, - "validation": 31.952 + "search_algorithm": 25.979, + "validation": 16.924 }, - "timestamp": "2026-03-05 08:58:33 UTC" + "timestamp": "2026-03-13 09:39:59 UTC" }, { "compilation_data": { @@ -10402,14 +10402,14 @@ "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -10417,49 +10417,49 @@ { "name": "time", "unit": "", - "value": 3645.664 + "value": 3686.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.137741937406144 + "value": 13.076858662364652 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 428.0 + "value": 416.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840160.0 + "value": 1836052.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.686708818867508 + "value": 1.6878185285454446 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61519.0 + "value": 61090.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2099321.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.560828566791557 + "value": 22.559762380196002 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -10471,7 +10471,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0572905552840435 + "value": 1.057200671268224 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10501,13 +10501,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.41902113511516 + "value": 60.366339899006526 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9533051246498 + "value": 99.95431328657357 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10567,7 +10567,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.10783949093691 + "value": 40.1037043487646 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10579,13 +10579,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.13213808774214 + "value": 45.12784607048731 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.988224179556505 + "value": 11.987084112473193 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10597,7 +10597,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.3854259315578 + "value": 57.38004328271467 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10616,19 +10616,19 @@ "time" ], "times": { - "compilation_time": 23952.175, - "data": 77899.058, - "framework": 285335.06, - "kernel_overhead": 56155.138, - "profiling_overhead": 64781.319, - "profiling_runs": 86499.545, + "compilation_time": 14887.282, + "data": 57246.511, + "framework": 244937.234, + "kernel_overhead": 55117.348, + "profiling_overhead": 47394.438, + "profiling_runs": 85178.937, "runtimes": [ - 3645.664 + 3686.048 ], - "search_algorithm": 46.146, - "validation": 27.811 + "search_algorithm": 24.012, + "validation": 17.475 }, - "timestamp": "2026-03-05 08:58:33 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -10649,14 +10649,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -10664,49 +10664,49 @@ { "name": "time", "unit": "", - "value": 2123.584 + "value": 2122.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.989669958419956 + "value": 22.956888350601176 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7568.0 + "value": 504.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838836.0 + "value": 1837340.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9603830050088193 + "value": 2.926737917787674 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 43062.0 + "value": 36061.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100646.0 + "value": 2099302.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.910667443335285 + "value": 19.908599853805335 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -10718,7 +10718,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9329922434769602 + "value": 0.9327028657671272 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10748,13 +10748,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.67205231795086 + "value": 95.60071689181278 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94468710535702 + "value": 99.92998341934313 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -10814,7 +10814,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.487746999362045 + "value": 50.48047723925701 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -10826,13 +10826,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.65940003004579 + "value": 79.64641023852649 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.73534883217414 + "value": 10.733598254801421 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -10844,7 +10844,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.23737559343756 + "value": 76.22509632686369 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -10863,19 +10863,19 @@ "time" ], "times": { - "compilation_time": 23659.473, - "data": 77516.922, - "framework": 291948.339, - "kernel_overhead": 63836.474, - "profiling_overhead": 64010.084, - "profiling_runs": 86584.859, + "compilation_time": 16149.166, + "data": 57099.446, + "framework": 249865.95799999998, + "kernel_overhead": 61613.246, + "profiling_overhead": 47376.441, + "profiling_runs": 83776.825, "runtimes": [ - 2123.584 + 2122.016 ], - "search_algorithm": 40.166, - "validation": 30.321 + "search_algorithm": 24.054, + "validation": 13.777 }, - "timestamp": "2026-03-05 08:58:33 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -10896,14 +10896,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -10911,49 +10911,49 @@ { "name": "time", "unit": "", - "value": 1835.104 + "value": 1943.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.25497972328244 + "value": 24.6764072224882 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 448.0 + "value": 7548.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837592.0 + "value": 1839716.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3302672811728042 + "value": 3.316109558208976 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32134.0 + "value": 40549.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2101052.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.293169227708892 + "value": 11.291482658317713 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -10965,7 +10965,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5290606063509016 + "value": 0.5289068945454217 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -10995,13 +10995,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.10898177595263 + "value": 96.69094759596389 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92591670147849 + "value": 99.908493213104 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11061,7 +11061,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.842067634155015 + "value": 45.83640720148217 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11073,13 +11073,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.35995178337744 + "value": 90.34945255009201 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.265191969355273 + "value": 6.264463995172395 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11091,7 +11091,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.4475216726092 + "value": 66.43997761016234 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11110,19 +11110,19 @@ "time" ], "times": { - "compilation_time": 23485.077, - "data": 78835.333, - "framework": 241120.76499999998, - "kernel_overhead": 38186.301, - "profiling_overhead": 64516.624, - "profiling_runs": 59582.507, + "compilation_time": 16585.475, + "data": 58463.755, + "framework": 200088.5, + "kernel_overhead": 36315.763, + "profiling_overhead": 47797.419, + "profiling_runs": 57511.563, "runtimes": [ - 1835.104 + 1943.936 ], - "search_algorithm": 40.834, - "validation": 32.341 + "search_algorithm": 34.075, + "validation": 17.789 }, - "timestamp": "2026-03-05 08:58:34 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -11143,14 +11143,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -11158,49 +11158,49 @@ { "name": "time", "unit": "", - "value": 1808.864 + "value": 1859.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.03152091856614 + "value": 26.431462741490343 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 420.0 + "value": 1280.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840904.0 + "value": 1837504.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4227764602235693 + "value": 3.411852801258955 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31472.0 + "value": 31743.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099158.0 + "value": 2099261.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.798534979269805 + "value": 5.7972407078998724 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -11212,7 +11212,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.27161857749053875 + "value": 0.2715471491637572 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11242,13 +11242,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.58522366411808 + "value": 98.58441865574966 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91880993463515 + "value": 99.91467054025021 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11308,7 +11308,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.634966048260914 + "value": 42.62301908119608 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11320,13 +11320,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.78780893282708 + "value": 92.76725137565269 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.397991049786148 + "value": 3.3972382095575937 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11338,7 +11338,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.07747484577879 + "value": 60.06433547506145 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11357,19 +11357,19 @@ "time" ], "times": { - "compilation_time": 24919.899, - "data": 77687.52, - "framework": 227850.243, - "kernel_overhead": 32281.823, - "profiling_overhead": 64349.927, - "profiling_runs": 53530.973, + "compilation_time": 15880.836, + "data": 56781.868, + "framework": 184345.528, + "kernel_overhead": 29855.577, + "profiling_overhead": 47025.448, + "profiling_runs": 50682.635, "runtimes": [ - 1808.864 + 1859.104 ], - "search_algorithm": 33.736, - "validation": 26.667 + "search_algorithm": 23.846, + "validation": 16.564 }, - "timestamp": "2026-03-05 08:58:34 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -11390,14 +11390,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -11405,49 +11405,49 @@ { "name": "time", "unit": "", - "value": 1750.592 + "value": 1760.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.35929272985014 + "value": 27.35134240877293 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 816.0 + "value": 5316.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837696.0 + "value": 1846348.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5330971638978657 + "value": 3.4870137636192142 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30899.0 + "value": 36022.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099900.0 + "value": 2100270.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.995923296779623 + "value": 2.9955779503732844 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -11459,7 +11459,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14035350625628495 + "value": 0.14032899887113268 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11489,13 +11489,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.42183782524971 + "value": 98.43439783629341 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92331413600127 + "value": 99.92471505664608 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11555,7 +11555,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53892258511228 + "value": 39.53339082589415 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11567,13 +11567,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.88819296020151 + "value": 95.87010565075911 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9430468788322082 + "value": 1.942680363528566 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11585,7 +11585,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.36269039133033 + "value": 58.35187112942539 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11604,19 +11604,19 @@ "time" ], "times": { - "compilation_time": 23906.91, - "data": 77753.871, - "framework": 234479.684, - "kernel_overhead": 35611.368, - "profiling_overhead": 64469.598, - "profiling_runs": 56644.847, + "compilation_time": 16843.16, + "data": 57912.55, + "framework": 193259.279, + "kernel_overhead": 33160.212, + "profiling_overhead": 48339.952, + "profiling_runs": 53846.565, "runtimes": [ - 1750.592 + 1760.928 ], - "search_algorithm": 44.024, - "validation": 26.473 + "search_algorithm": 29.214, + "validation": 16.99 }, - "timestamp": "2026-03-05 08:58:34 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -11637,14 +11637,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -11652,49 +11652,49 @@ { "name": "time", "unit": "", - "value": 1757.568 + "value": 1750.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.99321109325566 + "value": 27.67334691591673 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 828.0 + "value": 456.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834008.0 + "value": 1833760.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5294395110140293 + "value": 3.525001794193909 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30642.0 + "value": 30348.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099902.0 + "value": 2099076.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4945712653788041 + "value": 1.4942277541359679 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -11706,7 +11706,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07001765256247058 + "value": 0.06999991910816639 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11736,13 +11736,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.73421665286492 + "value": 81.74092345350775 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9357975647669 + "value": 99.92786705723371 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -11802,7 +11802,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.93871166564994 + "value": 37.93282632720237 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -11814,13 +11814,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.65885027668679 + "value": 95.64221243137006 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.156033468919921 + "value": 1.155832401209184 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -11832,7 +11832,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.52304406153514 + "value": 53.51388235973342 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -11851,19 +11851,19 @@ "time" ], "times": { - "compilation_time": 24316.543, - "data": 77295.961, - "framework": 213129.02, - "kernel_overhead": 25178.134, - "profiling_overhead": 64323.99, - "profiling_runs": 46330.935, + "compilation_time": 17923.512, + "data": 58516.471, + "framework": 175269.54299999998, + "kernel_overhead": 23853.835, + "profiling_overhead": 48588.41, + "profiling_runs": 44310.827, "runtimes": [ - 1757.568 + 1750.016 ], - "search_algorithm": 37.353, - "validation": 29.665 + "search_algorithm": 34.279, + "validation": 16.945 }, - "timestamp": "2026-03-05 08:58:34 UTC" + "timestamp": "2026-03-13 09:40:0 UTC" }, { "compilation_data": { @@ -11884,14 +11884,14 @@ "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -11899,49 +11899,49 @@ { "name": "time", "unit": "", - "value": 5214.08 + "value": 5159.68 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.07312538645365 + "value": 9.262054929325462 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10592.0 + "value": 14396.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838864.0 + "value": 1840884.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2102880658254853 + "value": 1.2089685352549122 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 95937.0 + "value": 103384.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100774.0 + "value": 2102367.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.955541533312376 + "value": 7.954741635171987 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -11953,7 +11953,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3727806988910017 + "value": 0.3727681886884133 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -11983,13 +11983,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.6730509041806 + "value": 97.81618024698192 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96091792317344 + "value": 99.96402144141355 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12049,7 +12049,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.26651754736503 + "value": 36.26494213110673 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12061,13 +12061,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.823056750186478 + "value": 31.821000838840863 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.2886541323493494 + "value": 4.288377066171913 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12079,7 +12079,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.81008492916699 + "value": 84.80466710601902 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12098,19 +12098,19 @@ "time" ], "times": { - "compilation_time": 23441.443, - "data": 77999.087, - "framework": 2120353.878, - "kernel_overhead": 969764.392, - "profiling_overhead": 64696.601, - "profiling_runs": 1007893.798, + "compilation_time": 17160.528, + "data": 58178.988, + "framework": 2091507.7329999998, + "kernel_overhead": 973699.193, + "profiling_overhead": 48076.224, + "profiling_runs": 1011553.328, "runtimes": [ - 5214.08 + 5159.68 ], - "search_algorithm": 43.021, - "validation": 34.157 + "search_algorithm": 24.491, + "validation": 17.378 }, - "timestamp": "2026-03-05 08:58:35 UTC" + "timestamp": "2026-03-13 09:40:1 UTC" }, { "compilation_data": { @@ -12131,14 +12131,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -12146,49 +12146,49 @@ { "name": "time", "unit": "", - "value": 5675.264 + "value": 5877.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.03346418996553 + "value": 8.355767902570419 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6548.0 + "value": 16764.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837188.0 + "value": 1841516.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.103670829000328 + "value": 1.1036310142077388 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 99597.0 + "value": 112057.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100330.0 + "value": 2106333.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6248589883838256 + "value": 3.624655450980168 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -12200,7 +12200,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.16988083214925623 + "value": 0.1698626501227523 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12230,13 +12230,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.79257061229609 + "value": 98.82166269113387 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97341421313484 + "value": 99.96868014833953 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12296,7 +12296,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82993787490858 + "value": 33.827445703972344 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12308,13 +12308,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.000705419205207 + "value": 28.998974723482846 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.0107910984019233 + "value": 2.0106710989914864 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12326,7 +12326,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.01998463753347 + "value": 85.01496766301393 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12345,19 +12345,19 @@ "time" ], "times": { - "compilation_time": 24813.785, - "data": 77242.395, - "framework": 2709245.0590000004, - "kernel_overhead": 1263717.148, - "profiling_overhead": 64103.557, - "profiling_runs": 1304181.959, + "compilation_time": 15193.866, + "data": 58155.642, + "framework": 2676112.106, + "kernel_overhead": 1264764.749, + "profiling_overhead": 48054.662, + "profiling_runs": 1305137.053, "runtimes": [ - 5675.264 + 5877.28 ], - "search_algorithm": 47.307, - "validation": 26.366 + "search_algorithm": 25.863, + "validation": 18.713 }, - "timestamp": "2026-03-05 08:58:37 UTC" + "timestamp": "2026-03-13 09:40:3 UTC" }, { "compilation_data": { @@ -12378,14 +12378,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -12393,49 +12393,49 @@ { "name": "time", "unit": "", - "value": 8113.408 + "value": 8202.783 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.005782068027793 + "value": 6.107804881036976 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3584.0 + "value": 5400.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1910692.0 + "value": 1908960.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.80624262034699 + "value": 48.82341327257352 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 557324.0 + "value": 599763.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138420853.0 + "value": 138423999.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2615769607938871 + "value": 1.2771077920393916 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -12447,7 +12447,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059424016471594565 + "value": 0.05965367806699363 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12477,13 +12477,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.09322937594767 + "value": 91.20397001426353 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.05288590272083 + "value": 99.72918536962962 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12543,7 +12543,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.602771476571903 + "value": 10.675083834799617 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12555,13 +12555,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.272676234472005 + "value": 20.417081223247653 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.977733371108132 + "value": 11.055929236612133 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12573,7 +12573,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.849980026461516 + "value": 30.062666280875366 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12592,19 +12592,19 @@ "time" ], "times": { - "compilation_time": 24541.867, - "data": 78103.696, - "framework": 940651.856, - "kernel_overhead": 372049.397, - "profiling_overhead": 65245.973, - "profiling_runs": 425252.79, + "compilation_time": 14496.293, + "data": 58159.956, + "framework": 906075.623, + "kernel_overhead": 373569.689, + "profiling_overhead": 48008.939, + "profiling_runs": 426337.039, "runtimes": [ - 8113.408 + 8202.783 ], - "search_algorithm": 30.921, - "validation": 29.267 + "search_algorithm": 26.437, + "validation": 15.447 }, - "timestamp": "2026-03-05 08:58:37 UTC" + "timestamp": "2026-03-13 09:40:3 UTC" }, { "compilation_data": { @@ -12625,14 +12625,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -12640,49 +12640,49 @@ { "name": "time", "unit": "", - "value": 8132.608 + "value": 8950.4 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.38922658769437 + "value": 6.339470860011641 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14632.0 + "value": 5892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2003228.0 + "value": 2001576.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.679918617497535 + "value": 53.56458169832884 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 15043916.0 + "value": 15109514.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424825.0 + "value": 138419528.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6331998246094167 + "value": 0.6372941119808453 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -12694,7 +12694,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02959697461498221 + "value": 0.02982395058854853 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12724,13 +12724,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.89016713988607 + "value": 88.86789069958499 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.60706240705633 + "value": 99.20939704129323 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -12790,7 +12790,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.166390636934457 + "value": 9.274760323601196 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -12802,13 +12802,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.2845737195403 + "value": 20.522064988099437 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.652372575862106 + "value": 10.777090280615695 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -12820,7 +12820,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.735547038133845 + "value": 28.060334812913588 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -12839,19 +12839,19 @@ "time" ], "times": { - "compilation_time": 23323.227, - "data": 80089.145, - "framework": 914847.747, - "kernel_overhead": 357593.026, - "profiling_overhead": 66368.003, - "profiling_runs": 410797.573, + "compilation_time": 14709.784, + "data": 58612.538, + "framework": 871709.282, + "kernel_overhead": 355534.581, + "profiling_overhead": 48524.934, + "profiling_runs": 409037.229, "runtimes": [ - 8132.608 + 8950.4 ], - "search_algorithm": 43.786, - "validation": 35.064 + "search_algorithm": 25.598, + "validation": 15.716 }, - "timestamp": "2026-03-05 08:58:38 UTC" + "timestamp": "2026-03-13 09:40:4 UTC" }, { "compilation_data": { @@ -12872,14 +12872,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -12887,49 +12887,49 @@ { "name": "time", "unit": "", - "value": 9279.904 + "value": 9336.512 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.001251512436616 + "value": 6.125476732955249 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17808.0 + "value": 19928.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2199888.0 + "value": 2196244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 72.82215886795757 + "value": 74.00556621893195 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98703595.0 + "value": 103813504.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138427456.0 + "value": 138425220.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.28067120444000637 + "value": 0.2843989254953286 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -12941,7 +12941,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013003059020427754 + "value": 0.013000637484058403 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -12971,13 +12971,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.69117901895379 + "value": 86.84572628252825 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.97198595007198 + "value": 98.69974442395062 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13037,7 +13037,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.547234655300931 + "value": 7.489273183713623 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13049,13 +13049,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.121006473154637 + "value": 17.984042223376264 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.367976368873277 + "value": 9.297170265624814 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13067,7 +13067,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.824967123982642 + "value": 23.644940663651642 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13086,19 +13086,19 @@ "time" ], "times": { - "compilation_time": 23771.51, - "data": 78763.167, - "framework": 895898.473, - "kernel_overhead": 346061.35, - "profiling_overhead": 65423.325, - "profiling_runs": 405650.631, + "compilation_time": 15182.617, + "data": 58822.988, + "framework": 859840.7960000001, + "kernel_overhead": 346499.308, + "profiling_overhead": 48404.961, + "profiling_runs": 406113.539, "runtimes": [ - 9279.904 + 9336.512 ], - "search_algorithm": 39.446, - "validation": 35.495 + "search_algorithm": 26.137, + "validation": 17.338 }, - "timestamp": "2026-03-05 08:58:38 UTC" + "timestamp": "2026-03-13 09:40:4 UTC" }, { "compilation_data": { @@ -13119,14 +13119,14 @@ "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -13134,49 +13134,49 @@ { "name": "time", "unit": "", - "value": 3399.264 + "value": 3461.92 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.509073742158495 + "value": 14.05185274419329 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6192.0 + "value": 8648.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1844360.0 + "value": 1839020.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8172343892091058 + "value": 1.8090853378049891 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62920.0 + "value": 67242.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100417.0 + "value": 2100872.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.060097827220842 + "value": 6.059403960102685 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -13188,7 +13188,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28394320581307164 + "value": 0.2839316422093916 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13218,13 +13218,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.19408973122896 + "value": 98.30881807718194 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94514493916526 + "value": 99.95083766968558 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13284,7 +13284,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.3836024506563 + "value": 41.37960786170656 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13296,13 +13296,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48623761390381 + "value": 48.48150157301422 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3618387408077837 + "value": 3.3615103629726653 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13314,7 +13314,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59777216452915 + "value": 87.58930879836447 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13333,19 +13333,19 @@ "time" ], "times": { - "compilation_time": 23406.721, - "data": 77242.536, - "framework": 1386886.187, - "kernel_overhead": 608250.803, - "profiling_overhead": 63892.4, - "profiling_runs": 637500.448, + "compilation_time": 15450.728, + "data": 57292.503, + "framework": 1351847.705, + "kernel_overhead": 609429.189, + "profiling_overhead": 46959.469, + "profiling_runs": 638166.544, "runtimes": [ - 3399.264 + 3461.92 ], - "search_algorithm": 56.615, - "validation": 28.056 + "search_algorithm": 25.87, + "validation": 17.871 }, - "timestamp": "2026-03-05 08:58:39 UTC" + "timestamp": "2026-03-13 09:40:5 UTC" }, { "compilation_data": { @@ -13366,14 +13366,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -13381,49 +13381,49 @@ { "name": "time", "unit": "", - "value": 4047.936 + "value": 4115.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.477310173457559 + "value": 11.764146304638787 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7644.0 + "value": 4924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838024.0 + "value": 1840176.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5318953586647863 + "value": 1.5221692651724867 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 75340.0 + "value": 73101.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103183.0 + "value": 2102870.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539986567751393 + "value": 2.539780593394104 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -13435,7 +13435,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11902664911553117 + "value": 0.11901919581650626 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13465,13 +13465,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.94623423626844 + "value": 98.97656701612499 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96343575658179 + "value": 99.9656032780645 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13531,7 +13531,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.970317759883955 + "value": 40.96704260326837 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13543,13 +13543,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.642623565913496 + "value": 40.639197390424 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4883773278532773 + "value": 1.488251857559473 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13561,7 +13561,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.27550404250832 + "value": 90.26797435333168 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13580,19 +13580,19 @@ "time" ], "times": { - "compilation_time": 22972.043, - "data": 78807.527, - "framework": 1972599.514, - "kernel_overhead": 898065.971, - "profiling_overhead": 65225.148, - "profiling_runs": 930500.868, + "compilation_time": 14668.051, + "data": 57190.325, + "framework": 1940139.5839999998, + "kernel_overhead": 901936.975, + "profiling_overhead": 47184.013, + "profiling_runs": 933828.271, "runtimes": [ - 4047.936 + 4115.776 ], - "search_algorithm": 44.179, - "validation": 35.973 + "search_algorithm": 30.692, + "validation": 21.156 }, - "timestamp": "2026-03-05 08:58:40 UTC" + "timestamp": "2026-03-13 09:40:6 UTC" }, { "compilation_data": { @@ -13613,14 +13613,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -13628,49 +13628,49 @@ { "name": "time", "unit": "", - "value": 8093.376 + "value": 8468.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.424993586454592 + "value": 6.182997552635893 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17016.0 + "value": 24488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2000712.0 + "value": 2003092.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 55.89334807060442 + "value": 55.791246828230165 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 21186501.0 + "value": 20951393.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418150.0 + "value": 138423538.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6337007595617721 + "value": 0.6366516220328106 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -13682,7 +13682,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029633363810746407 + "value": 0.029767177215260704 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13712,13 +13712,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.4125432429761 + "value": 90.31836536358495 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.52415486914897 + "value": 99.86633219006991 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -13778,7 +13778,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.026673073664881 + "value": 9.036016762294514 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -13790,13 +13790,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.32643203190243 + "value": 20.348258717404438 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.592746313465342 + "value": 5.598751849246778 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -13808,7 +13808,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.170363069145225 + "value": 20.192081444624687 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -13827,19 +13827,19 @@ "time" ], "times": { - "compilation_time": 23685.444, - "data": 78792.443, - "framework": 588432.939, - "kernel_overhead": 195792.933, - "profiling_overhead": 65366.243, - "profiling_runs": 248481.32, + "compilation_time": 14839.361, + "data": 58465.931, + "framework": 547386.1529999999, + "kernel_overhead": 193680.853, + "profiling_overhead": 48380.678, + "profiling_runs": 246858.691, "runtimes": [ - 8093.376 + 8468.192 ], - "search_algorithm": 43.401, - "validation": 29.755 + "search_algorithm": 25.264, + "validation": 15.059 }, - "timestamp": "2026-03-05 08:58:41 UTC" + "timestamp": "2026-03-13 09:40:7 UTC" }, { "compilation_data": { @@ -13860,14 +13860,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -13875,49 +13875,49 @@ { "name": "time", "unit": "", - "value": 9380.8 + "value": 9451.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.935433393469489 + "value": 5.889228285100796 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16336.0 + "value": 7204.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2196332.0 + "value": 2197512.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.83186492217116 + "value": 81.77222747069209 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133023845.0 + "value": 133094342.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417382.0 + "value": 138416387.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2742172787979654 + "value": 0.2750836473069054 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -13929,7 +13929,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012726487829270685 + "value": 0.012764518361938332 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -13959,13 +13959,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 99.0298395274492 + "value": 98.96076254901725 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.52937991741578 + "value": 98.93603070140904 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14025,7 +14025,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.274085888799478 + "value": 7.265474455786407 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14037,13 +14037,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.635245511671922 + "value": 17.615243182837332 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.708042228274718 + "value": 4.702702251082183 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14055,7 +14055,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.573110907051056 + "value": 16.554364724951643 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14074,19 +14074,19 @@ "time" ], "times": { - "compilation_time": 23132.867, - "data": 77279.863, - "framework": 571764.99, - "kernel_overhead": 185232.692, - "profiling_overhead": 63943.809, - "profiling_runs": 245308.626, + "compilation_time": 14409.408, + "data": 58383.301, + "framework": 533165.28, + "kernel_overhead": 183449.556, + "profiling_overhead": 48296.547, + "profiling_runs": 243035.876, "runtimes": [ - 9380.8 + 9451.104 ], - "search_algorithm": 46.325, - "validation": 30.417 + "search_algorithm": 22.1, + "validation": 14.606 }, - "timestamp": "2026-03-05 08:58:41 UTC" + "timestamp": "2026-03-13 09:40:7 UTC" }, { "compilation_data": { @@ -14107,14 +14107,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -14122,49 +14122,49 @@ { "name": "time", "unit": "", - "value": 2601.792 + "value": 2576.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 24.50188776203581 + "value": 18.790507597316136 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6040.0 + "value": 2892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840140.0 + "value": 1839624.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.4093822850089035 + "value": 2.410230107998907 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 50406.0 + "value": 48351.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100392.0 + "value": 2103421.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.067505455968158 + "value": 4.066894538465361 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -14176,7 +14176,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19058289704615122 + "value": 0.1905697738472762 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14206,13 +14206,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.72349072947185 + "value": 98.72867425644412 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94871362166752 + "value": 99.95653446082463 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14272,7 +14272,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.254681124120026 + "value": 45.248249348924325 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14284,13 +14284,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.08567561092036 + "value": 65.07610181839995 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3835086283296025 + "value": 2.38315802557617 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14302,7 +14302,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.70221661719883 + "value": 91.6888525085137 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14321,19 +14321,19 @@ "time" ], "times": { - "compilation_time": 23469.892, - "data": 77637.294, - "framework": 1112721.398, - "kernel_overhead": 473300.913, - "profiling_overhead": 63343.769, - "profiling_runs": 498439.422, + "compilation_time": 14945.79, + "data": 58191.565, + "framework": 1075123.785, + "kernel_overhead": 472188.01, + "profiling_overhead": 48207.024, + "profiling_runs": 496537.186, "runtimes": [ - 2601.792 + 2576.864 ], - "search_algorithm": 44.189, - "validation": 36.755 + "search_algorithm": 26.198, + "validation": 15.752 }, - "timestamp": "2026-03-05 08:58:42 UTC" + "timestamp": "2026-03-13 09:40:7 UTC" }, { "compilation_data": { @@ -14354,14 +14354,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -14369,49 +14369,49 @@ { "name": "time", "unit": "", - "value": 3297.536 + "value": 3344.128 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.359124331550802 + "value": 14.651892212338943 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11464.0 + "value": 2928.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842056.0 + "value": 1839844.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9206133388378843 + "value": 1.9126783579369546 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68513.0 + "value": 57133.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108741.0 + "value": 2100434.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5936443049944211 + "value": 1.5934887398812632 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -14423,7 +14423,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0746348703978352 + "value": 0.0746592940613781 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14453,13 +14453,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.70011018107981 + "value": 98.68769386326086 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90539257285859 + "value": 99.94808921907156 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14519,7 +14519,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.376938233219505 + "value": 45.37259971070455 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14531,13 +14531,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.99898702107766 + "value": 50.993882735321094 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0334267389525011 + "value": 1.033323307380774 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14549,7 +14549,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.17550193350114 + "value": 95.16607687860301 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14568,19 +14568,19 @@ "time" ], "times": { - "compilation_time": 23536.751, - "data": 78036.38, - "framework": 1868428.9, - "kernel_overhead": 848790.187, - "profiling_overhead": 64279.852, - "profiling_runs": 877322.481, + "compilation_time": 16413.005, + "data": 57255.037, + "framework": 1832023.54, + "kernel_overhead": 849708.645, + "profiling_overhead": 47064.459, + "profiling_runs": 877995.399, "runtimes": [ - 3297.536 + 3344.128 ], - "search_algorithm": 44.023, - "validation": 27.496 + "search_algorithm": 29.642, + "validation": 17.266 }, - "timestamp": "2026-03-05 08:58:43 UTC" + "timestamp": "2026-03-13 09:40:8 UTC" }, { "compilation_data": { @@ -14601,14 +14601,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -14616,49 +14616,49 @@ { "name": "time", "unit": "", - "value": 5451.712 + "value": 5483.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.597336379313123 + "value": 8.787379741405724 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3760.0 + "value": 492.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838424.0 + "value": 1835044.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.164163135028142 + "value": 1.1499018329281727 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 97656.0 + "value": 89759.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105050.0 + "value": 2099490.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.476759406602004 + "value": 0.4767274092027408 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -14670,7 +14670,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022341655697049476 + "value": 0.022341524086119304 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14700,13 +14700,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90061101697658 + "value": 73.89633961111531 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9677684162297 + "value": 99.97266320076078 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -14766,7 +14766,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.99364301998961 + "value": 42.99122904613712 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -14778,13 +14778,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.513642275209524 + "value": 30.51196854783408 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3687561749567557 + "value": 0.36873594802680343 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -14796,7 +14796,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.94793209624359 + "value": 96.94264797541064 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -14815,19 +14815,19 @@ "time" ], "times": { - "compilation_time": 24764.408, - "data": 77422.538, - "framework": 3095903.091, - "kernel_overhead": 1457488.694, - "profiling_overhead": 64440.561, - "profiling_runs": 1496551.298, + "compilation_time": 15016.076, + "data": 57641.365, + "framework": 3056821.1859999998, + "kernel_overhead": 1456557.317, + "profiling_overhead": 47627.972, + "profiling_runs": 1494994.532, "runtimes": [ - 5451.712 + 5483.008 ], - "search_algorithm": 46.412, - "validation": 27.653 + "search_algorithm": 26.033, + "validation": 14.837 }, - "timestamp": "2026-03-05 08:58:44 UTC" + "timestamp": "2026-03-13 09:40:10 UTC" }, { "compilation_data": { @@ -14848,14 +14848,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -14863,49 +14863,49 @@ { "name": "time", "unit": "", - "value": 3236.864 + "value": 3279.456 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.265996953919647 + "value": 14.76675380715869 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 848.0 + "value": 4212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836900.0 + "value": 1837696.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8952988077900839 + "value": 1.8986124633659907 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 55545.0 + "value": 58717.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100018.0 + "value": 2099784.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5876859733886626 + "value": 1.5875429860265615 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -14917,7 +14917,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07440067134980788 + "value": 0.07439297349647399 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -14947,13 +14947,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.72689018453617 + "value": 98.73542220225696 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96462138486484 + "value": 99.96446536800639 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15013,7 +15013,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.0326671277218 + "value": 42.0283455890491 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15025,13 +15025,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80883376988963 + "value": 50.80365613250557 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0295735358644627 + "value": 1.0294686179194243 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15043,7 +15043,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.26341767533044 + "value": 89.25441863024241 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15062,19 +15062,19 @@ "time" ], "times": { - "compilation_time": 23512.381, - "data": 77507.289, - "framework": 1694643.151, - "kernel_overhead": 762262.941, - "profiling_overhead": 64409.014, - "profiling_runs": 790463.907, + "compilation_time": 14823.022, + "data": 57154.503, + "framework": 1655243.3960000002, + "kernel_overhead": 761634.685, + "profiling_overhead": 47019.942, + "profiling_runs": 789434.266, "runtimes": [ - 3236.864 + 3279.456 ], - "search_algorithm": 47.096, - "validation": 30.354 + "search_algorithm": 25.079, + "validation": 16.507 }, - "timestamp": "2026-03-05 08:58:45 UTC" + "timestamp": "2026-03-13 09:40:11 UTC" }, { "compilation_data": { @@ -15095,14 +15095,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -15110,49 +15110,49 @@ { "name": "time", "unit": "", - "value": 6181.76 + "value": 6277.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.173916819077595 + "value": 7.767065246516614 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 848.0 + "value": 13844.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836200.0 + "value": 1841304.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.026428799161382 + "value": 1.028680540143125 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 104275.0 + "value": 117244.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102966.0 + "value": 2111279.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41935248933971053 + "value": 0.41932640369727187 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -15164,7 +15164,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01965289230502901 + "value": 0.019651985848523106 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15194,13 +15194,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.00415189596089 + "value": 82.01021625195445 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97307413410681 + "value": 99.97501890432119 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15260,7 +15260,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84809606037587 + "value": 42.8453442859236 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15272,13 +15272,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.83997585637113 + "value": 26.83821582555742 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32436005978768817 + "value": 0.3243387898840557 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15290,7 +15290,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.65858731422863 + "value": 93.65248504664707 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15309,19 +15309,19 @@ "time" ], "times": { - "compilation_time": 23230.539, - "data": 76913.032, - "framework": 3582615.74, - "kernel_overhead": 1699810.697, - "profiling_overhead": 63537.714, - "profiling_runs": 1742354.297, + "compilation_time": 14433.998, + "data": 58524.528, + "framework": 3545812.423, + "kernel_overhead": 1698106.457, + "profiling_overhead": 48611.943, + "profiling_runs": 1740569.495, "runtimes": [ - 6181.76 + 6277.28 ], - "search_algorithm": 46.441, - "validation": 27.481 + "search_algorithm": 32.669, + "validation": 17.029 }, - "timestamp": "2026-03-05 08:58:47 UTC" + "timestamp": "2026-03-13 09:40:13 UTC" }, { "compilation_data": { @@ -15342,14 +15342,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -15357,49 +15357,49 @@ { "name": "time", "unit": "", - "value": 5798.496 + "value": 5897.888 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.71859025945962 + "value": 8.229411314204023 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6384.0 + "value": 17000.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834840.0 + "value": 1839808.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.075070913951803 + "value": 1.079994925131695 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 101825.0 + "value": 115550.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100504.0 + "value": 2106056.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4409068398994503 + "value": 0.4408732139948083 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -15411,7 +15411,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020657350237572977 + "value": 0.020662359788787905 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15441,13 +15441,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.02275930736741 + "value": 82.01511674321664 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94463645635663 + "value": 99.97426768598284 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15507,7 +15507,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.94866102540273 + "value": 43.94643659146637 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15519,13 +15519,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.219792334744948 + "value": 28.218269778748095 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34103508802975463 + "value": 0.3410166880000075 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15537,7 +15537,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.50506726623156 + "value": 94.50001243479227 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15556,19 +15556,19 @@ "time" ], "times": { - "compilation_time": 23683.101, - "data": 78524.07, - "framework": 3485948.987, - "kernel_overhead": 1650338.161, - "profiling_overhead": 65473.719, - "profiling_runs": 1691613.037, + "compilation_time": 15395.454, + "data": 58685.251, + "framework": 3454373.142, + "kernel_overhead": 1653085.528, + "profiling_overhead": 48698.623, + "profiling_runs": 1693903.74, "runtimes": [ - 5798.496 + 5897.888 ], - "search_algorithm": 48.541, - "validation": 33.342 + "search_algorithm": 22.937, + "validation": 14.802 }, - "timestamp": "2026-03-05 08:58:49 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -15589,14 +15589,14 @@ "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -15604,49 +15604,49 @@ { "name": "time", "unit": "", - "value": 3624.96 + "value": 3685.472 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.12124057908796 + "value": 13.08290244035894 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 664.0 + "value": 512.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837056.0 + "value": 1835800.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6876085947961486 + "value": 1.6890298963247599 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61167.0 + "value": 61321.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2099466.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.575471708522592 + "value": 22.572996507534047 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -15658,7 +15658,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0579003620013563 + "value": 1.0577727558631234 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15688,13 +15688,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 71.60134550855534 + "value": 72.43716671246821 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96544476254897 + "value": 99.9555866838285 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -15754,7 +15754,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.12501015969425 + "value": 40.12449724812488 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -15766,13 +15766,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.152684729481656 + "value": 45.151690946750215 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.993681881268566 + "value": 11.993417907730528 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -15784,7 +15784,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.411629162816965 + "value": 57.41037078258023 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -15803,19 +15803,19 @@ "time" ], "times": { - "compilation_time": 23596.529, - "data": 77142.193, - "framework": 282727.81799999997, - "kernel_overhead": 55563.932, - "profiling_overhead": 64250.378, - "profiling_runs": 85771.315, + "compilation_time": 22107.845, + "data": 58026.889, + "framework": 245074.51, + "kernel_overhead": 54245.581, + "profiling_overhead": 48129.576, + "profiling_runs": 84672.464, "runtimes": [ - 3624.96 + 3685.472 ], - "search_algorithm": 45.635, - "validation": 27.32 + "search_algorithm": 23.766, + "validation": 18.893 }, - "timestamp": "2026-03-05 08:58:49 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -15836,14 +15836,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -15851,49 +15851,49 @@ { "name": "time", "unit": "", - "value": 2133.312 + "value": 2134.72 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.95872224763014 + "value": 22.850668393947103 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4916.0 + "value": 892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839736.0 + "value": 1839212.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.932265842410314 + "value": 2.933742597674666 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 39388.0 + "value": 36054.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099746.0 + "value": 2100201.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.913038930848913 + "value": 19.907506338501875 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -15905,7 +15905,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9328880326533587 + "value": 0.9327310078123242 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -15935,13 +15935,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.21472624907025 + "value": 94.30415073401413 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92750423981782 + "value": 99.9333528901577 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16001,7 +16001,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.490437537584356 + "value": 50.47889305982858 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16013,13 +16013,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.66419861946883 + "value": 79.6461278422932 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.735995517076855 + "value": 10.733560197496544 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16031,7 +16031,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.24213631397151 + "value": 76.22488065503386 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16050,19 +16050,19 @@ "time" ], "times": { - "compilation_time": 23950.702, - "data": 77961.772, - "framework": 294393.065, - "kernel_overhead": 64428.428, - "profiling_overhead": 64746.212, - "profiling_runs": 87256.653, + "compilation_time": 14637.747, + "data": 56816.215, + "framework": 246647.953, + "kernel_overhead": 60282.096, + "profiling_overhead": 46977.306, + "profiling_runs": 82572.336, "runtimes": [ - 2133.312 + 2134.72 ], - "search_algorithm": 42.817, - "validation": 29.385 + "search_algorithm": 33.888, + "validation": 15.43 }, - "timestamp": "2026-03-05 08:58:49 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -16083,14 +16083,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -16098,49 +16098,49 @@ { "name": "time", "unit": "", - "value": 1844.96 + "value": 1876.416 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.02317096285065 + "value": 25.737377726607402 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 432.0 + "value": 5992.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837712.0 + "value": 1839588.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.330012303596684 + "value": 3.340883091060524 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32434.0 + "value": 39598.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099170.0 + "value": 2104158.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.292922785377108 + "value": 11.290846772321125 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -16152,7 +16152,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.528496466690594 + "value": 0.528871553463349 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16182,13 +16182,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.87232169720669 + "value": 95.87348230235104 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.8199415107782 + "value": 99.90100907868191 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16248,7 +16248,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.84133442200045 + "value": 45.83718702765216 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16260,13 +16260,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.35943013997418 + "value": 90.35018360357333 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.265155800720866 + "value": 6.264514683450885 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16278,7 +16278,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.44732960844479 + "value": 66.44057444073563 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16297,19 +16297,19 @@ "time" ], "times": { - "compilation_time": 23539.934, - "data": 77830.388, - "framework": 240272.68800000002, - "kernel_overhead": 38048.799, - "profiling_overhead": 64598.81, - "profiling_runs": 59794.691, + "compilation_time": 15381.889, + "data": 57416.347, + "framework": 196555.738, + "kernel_overhead": 35105.083, + "profiling_overhead": 47813.69, + "profiling_runs": 56220.618, "runtimes": [ - 1844.96 + 1876.416 ], - "search_algorithm": 38.978, - "validation": 28.529 + "search_algorithm": 23.707, + "validation": 15.802 }, - "timestamp": "2026-03-05 08:58:49 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -16330,14 +16330,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -16345,49 +16345,49 @@ { "name": "time", "unit": "", - "value": 1827.168 + "value": 1840.096 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.963153825549114 + "value": 26.559723353448593 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 112.0 + "value": 200.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837708.0 + "value": 1836356.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4202413703640913 + "value": 3.4084468916351294 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30950.0 + "value": 30798.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099159.0 + "value": 2098903.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.798459706722612 + "value": 5.797118836206456 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -16399,7 +16399,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2715981955783292 + "value": 0.2715511397556902 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16429,13 +16429,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.32521897552597 + "value": 98.19979726666077 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91380112759225 + "value": 99.91765816091504 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16495,7 +16495,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.63274187476548 + "value": 42.624645236533055 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16507,13 +16507,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.7854974766525 + "value": 92.7658408026351 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3979064017328793 + "value": 3.397186552830875 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16525,7 +16525,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.07617319170131 + "value": 60.06348851773476 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16544,19 +16544,19 @@ "time" ], "times": { - "compilation_time": 23490.323, - "data": 77227.873, - "framework": 228069.61599999998, - "kernel_overhead": 32559.838, - "profiling_overhead": 64309.661, - "profiling_runs": 53972.244, + "compilation_time": 15126.398, + "data": 56920.526, + "framework": 184441.12399999998, + "kernel_overhead": 29706.764, + "profiling_overhead": 47306.241, + "profiling_runs": 50507.593, "runtimes": [ - 1827.168 + 1840.096 ], - "search_algorithm": 33.564, - "validation": 26.993 + "search_algorithm": 27.424, + "validation": 14.372 }, - "timestamp": "2026-03-05 08:58:50 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -16577,14 +16577,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -16592,49 +16592,49 @@ { "name": "time", "unit": "", - "value": 1781.696 + "value": 1767.04 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.1980473454436 + "value": 27.1134111403344 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5712.0 + "value": 7272.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840956.0 + "value": 1840084.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5556317230549723 + "value": 3.5414489971724055 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 36716.0 + "value": 37526.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103924.0 + "value": 2100594.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.996106443989278 + "value": 2.995311185205292 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -16646,7 +16646,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.14031685253587198 + "value": 0.14028732872513203 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16676,13 +16676,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.33963732657661 + "value": 98.24925455317026 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89617118153573 + "value": 99.89722944227796 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16742,7 +16742,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.53978342448487 + "value": 39.53129300756368 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -16754,13 +16754,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.88919861978373 + "value": 95.8680071619942 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.943067257188 + "value": 1.9426378404408005 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -16772,7 +16772,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.363505030581685 + "value": 58.350653871482905 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -16791,19 +16791,19 @@ "time" ], "times": { - "compilation_time": 24030.089, - "data": 77746.303, - "framework": 235694.718, - "kernel_overhead": 36053.932, - "profiling_overhead": 64475.073, - "profiling_runs": 57419.41, + "compilation_time": 15106.288, + "data": 57139.554, + "framework": 191262.272, + "kernel_overhead": 33291.346, + "profiling_overhead": 47043.482, + "profiling_runs": 53787.89, "runtimes": [ - 1781.696 + 1767.04 ], - "search_algorithm": 37.714, - "validation": 26.897 + "search_algorithm": 28.101, + "validation": 17.346 }, - "timestamp": "2026-03-05 08:58:50 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -16824,14 +16824,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -16839,49 +16839,49 @@ { "name": "time", "unit": "", - "value": 1753.888 + "value": 1817.376 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.70606733488811 + "value": 27.20618573813408 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5092.0 + "value": 1500.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837044.0 + "value": 1834208.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5303643473386868 + "value": 3.495134394639864 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34342.0 + "value": 29967.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103624.0 + "value": 2099415.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4943946447970224 + "value": 1.4940856379162875 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -16893,7 +16893,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07000559794112443 + "value": 0.06999054148004706 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -16923,13 +16923,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.69389834862477 + "value": 81.71369419889523 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92762976188185 + "value": 99.92554996799505 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -16989,7 +16989,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.93515283515334 + "value": 37.928541787347484 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17001,13 +17001,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.65019866548326 + "value": 95.63161707027325 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1559289145364797 + "value": 1.1557043566842105 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17019,7 +17019,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.51835083010823 + "value": 53.50795402649145 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17038,19 +17038,19 @@ "time" ], "times": { - "compilation_time": 23732.615, - "data": 78067.903, - "framework": 214001.34500000003, - "kernel_overhead": 25279.917, - "profiling_overhead": 64145.735, - "profiling_runs": 46507.79, + "compilation_time": 15312.68, + "data": 56544.177, + "framework": 169856.877, + "kernel_overhead": 22879.209, + "profiling_overhead": 46980.674, + "profiling_runs": 43452.817, "runtimes": [ - 1753.888 + 1817.376 ], - "search_algorithm": 40.512, - "validation": 29.52 + "search_algorithm": 24.654, + "validation": 15.354 }, - "timestamp": "2026-03-05 08:58:50 UTC" + "timestamp": "2026-03-13 09:40:15 UTC" }, { "compilation_data": { @@ -17071,14 +17071,14 @@ "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -17086,49 +17086,49 @@ { "name": "time", "unit": "", - "value": 5210.816 + "value": 5190.304 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.066314593221472 + "value": 9.266893672545894 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9412.0 + "value": 5048.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838908.0 + "value": 1840336.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2096921341787783 + "value": 1.2080621554872668 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 94728.0 + "value": 90784.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100770.0 + "value": 2101742.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.955461407999722 + "value": 7.95471248036548 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -17140,7 +17140,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.37280334661389636 + "value": 0.3727828476204605 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17170,13 +17170,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.35592517258348 + "value": 97.1619163672518 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96727911504107 + "value": 99.96924453339801 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17236,7 +17236,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.266811313524435 + "value": 36.263770950637145 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17248,13 +17248,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.822965000154042 + "value": 31.820589567743024 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.288641767598884 + "value": 4.288321640965368 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17266,7 +17266,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.80990822108174 + "value": 84.80359262102318 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17285,19 +17285,19 @@ "time" ], "times": { - "compilation_time": 23335.297, - "data": 78746.469, - "framework": 2148611.875, - "kernel_overhead": 982973.6, - "profiling_overhead": 65515.478, - "profiling_runs": 1021376.328, + "compilation_time": 15021.553, + "data": 57555.909, + "framework": 2097297.394, + "kernel_overhead": 976845.126, + "profiling_overhead": 47941.592, + "profiling_runs": 1014954.767, "runtimes": [ - 5210.816 + 5190.304 ], - "search_algorithm": 57.373, - "validation": 29.206 + "search_algorithm": 23.518, + "validation": 14.73 }, - "timestamp": "2026-03-05 08:58:51 UTC" + "timestamp": "2026-03-13 09:40:17 UTC" }, { "compilation_data": { @@ -17318,14 +17318,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -17333,49 +17333,49 @@ { "name": "time", "unit": "", - "value": 5781.28 + "value": 5794.528 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.072896629110561 + "value": 8.26871203417209 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13904.0 + "value": 18968.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842856.0 + "value": 1841860.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1183377259524692 + "value": 1.1119226698252491 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 112627.0 + "value": 115403.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2113504.0 + "value": 2103678.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.6249279096530853 + "value": 3.6246085756153956 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -17387,7 +17387,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1698645579666433 + "value": 0.16987257724991048 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17417,13 +17417,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.69086269060774 + "value": 98.52870575941368 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96272495122425 + "value": 99.9756970488545 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17483,7 +17483,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.82979358778209 + "value": 33.82731188552912 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17495,13 +17495,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.001028040321287 + "value": 28.99863404118194 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.0108134676394642 + "value": 2.010647477464764 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17513,7 +17513,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.02099289599884 + "value": 85.01398823736768 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17532,19 +17532,19 @@ "time" ], "times": { - "compilation_time": 23477.497, - "data": 78872.385, - "framework": 2737648.452, - "kernel_overhead": 1276075.27, - "profiling_overhead": 65816.104, - "profiling_runs": 1316884.693, + "compilation_time": 15314.807, + "data": 58277.336, + "framework": 2688842.41, + "kernel_overhead": 1271021.768, + "profiling_overhead": 47982.609, + "profiling_runs": 1311560.697, "runtimes": [ - 5781.28 + 5794.528 ], - "search_algorithm": 47.14, - "validation": 32.859 + "search_algorithm": 25.571, + "validation": 17.514 }, - "timestamp": "2026-03-05 08:58:53 UTC" + "timestamp": "2026-03-13 09:40:18 UTC" }, { "compilation_data": { @@ -17565,14 +17565,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -17580,49 +17580,49 @@ { "name": "time", "unit": "", - "value": 8029.343 + "value": 8054.335 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.961592813766259 + "value": 6.207643226186194 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6188.0 + "value": 29324.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1910240.0 + "value": 1914596.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.18159785300471 + "value": 49.158568184791925 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 889462.0 + "value": 918706.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415206.0 + "value": 138421298.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2856761259097234 + "value": 1.2841558792481054 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -17634,7 +17634,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06000813391502677 + "value": 0.060675301589652904 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17664,13 +17664,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.56317908700358 + "value": 97.5463892602469 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.26879055359261 + "value": 100.65379027753345 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17730,7 +17730,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.682569914067033 + "value": 10.760639964410403 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17742,13 +17742,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.427868196317085 + "value": 20.575979191142526 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.061770424665843 + "value": 11.141973106922395 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -17760,7 +17760,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.078553575300827 + "value": 30.29665175114155 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -17779,19 +17779,19 @@ "time" ], "times": { - "compilation_time": 23490.085, - "data": 79529.533, - "framework": 949437.9199999999, - "kernel_overhead": 375216.291, - "profiling_overhead": 66476.082, - "profiling_runs": 428216.014, + "compilation_time": 14428.673, + "data": 57988.75, + "framework": 905670.345, + "kernel_overhead": 373666.562, + "profiling_overhead": 47968.085, + "profiling_runs": 426046.948, "runtimes": [ - 8029.343 + 8054.335 ], - "search_algorithm": 41.515, - "validation": 34.907 + "search_algorithm": 25.182, + "validation": 15.502 }, - "timestamp": "2026-03-05 08:58:53 UTC" + "timestamp": "2026-03-13 09:40:19 UTC" }, { "compilation_data": { @@ -17812,14 +17812,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -17827,49 +17827,49 @@ { "name": "time", "unit": "", - "value": 8130.144 + "value": 8318.847 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.257344957358661 + "value": 6.333929797286383 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6876.0 + "value": 10184.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1998372.0 + "value": 1997540.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.99939718695646 + "value": 53.795495328351706 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 15892944.0 + "value": 15905517.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415583.0 + "value": 138420378.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6333093117023759 + "value": 0.6362119932485598 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -17881,7 +17881,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029740213139162266 + "value": 0.029618766130121398 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -17911,13 +17911,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.08222304923612 + "value": 90.15398668689765 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.1380743639169 + "value": 99.11602510168737 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -17977,7 +17977,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.161619747373893 + "value": 9.218365787293479 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -17989,13 +17989,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.27465806450921 + "value": 20.400075894977874 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.647165404482253 + "value": 10.713028137230813 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18007,7 +18007,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.722054551437747 + "value": 27.893556429453987 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18026,19 +18026,19 @@ "time" ], "times": { - "compilation_time": 24488.927, - "data": 80663.798, - "framework": 926832.254, - "kernel_overhead": 362888.094, - "profiling_overhead": 67083.902, - "profiling_runs": 416196.46, + "compilation_time": 14698.911, + "data": 57879.109, + "framework": 876435.503, + "kernel_overhead": 358624.247, + "profiling_overhead": 47921.999, + "profiling_runs": 412010.148, "runtimes": [ - 8130.144 + 8318.847 ], - "search_algorithm": 43.027, - "validation": 28.772 + "search_algorithm": 38.706, + "validation": 18.207 }, - "timestamp": "2026-03-05 08:58:54 UTC" + "timestamp": "2026-03-13 09:40:19 UTC" }, { "compilation_data": { @@ -18059,14 +18059,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -18074,49 +18074,49 @@ { "name": "time", "unit": "", - "value": 9525.088 + "value": 10489.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.888665007196603 + "value": 5.781606328392246 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18516.0 + "value": 7372.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2198368.0 + "value": 2199296.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.4585764634975 + "value": 76.45174849532879 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118029063.0 + "value": 118575942.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425319.0 + "value": 138416578.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.27324321451550376 + "value": 0.27533385236589153 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18128,7 +18128,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01260033173080384 + "value": 0.012671866050455931 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18158,13 +18158,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.80549509476032 + "value": 94.71130736355947 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.58778765604728 + "value": 98.44000768345316 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18224,7 +18224,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.340224717934657 + "value": 7.31784994267429 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18236,13 +18236,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.628899410814856 + "value": 17.575497525210665 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.113572876562612 + "value": 9.085965822664448 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18254,7 +18254,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.178013259556447 + "value": 23.107815810898046 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18273,19 +18273,19 @@ "time" ], "times": { - "compilation_time": 23382.441, - "data": 80671.0, - "framework": 905944.874, - "kernel_overhead": 348583.718, - "profiling_overhead": 66973.521, - "profiling_runs": 409716.635, + "compilation_time": 14492.104, + "data": 58611.451, + "framework": 861139.37, + "kernel_overhead": 346321.455, + "profiling_overhead": 48270.338, + "profiling_runs": 407936.126, "runtimes": [ - 9525.088 + 10489.632 ], - "search_algorithm": 46.722, - "validation": 29.059 + "search_algorithm": 25.378, + "validation": 19.483 }, - "timestamp": "2026-03-05 08:58:54 UTC" + "timestamp": "2026-03-13 09:40:20 UTC" }, { "compilation_data": { @@ -18306,14 +18306,14 @@ "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -18321,49 +18321,49 @@ { "name": "time", "unit": "", - "value": 3433.952 + "value": 3477.984 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.3361419823811 + "value": 13.967102445292422 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6512.0 + "value": 212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837124.0 + "value": 1836348.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8161804646778121 + "value": 1.7994892347992841 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 63248.0 + "value": 57534.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100390.0 + "value": 2099099.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.060310164537633 + "value": 6.059674072687125 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18375,7 +18375,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.28397588423545594 + "value": 0.2839290671804871 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18405,13 +18405,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.24190216516125 + "value": 97.95052779441636 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95597920982462 + "value": 99.94838560833385 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18471,7 +18471,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.385627022334006 + "value": 41.379533977945194 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18483,13 +18483,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48656174379297 + "value": 48.48225128453636 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3618612146575204 + "value": 3.3615623449239083 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18501,7 +18501,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.59846233728926 + "value": 87.59069794504867 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18520,19 +18520,19 @@ "time" ], "times": { - "compilation_time": 23240.691, - "data": 79023.44, - "framework": 1407640.5980000002, - "kernel_overhead": 617034.707, - "profiling_overhead": 65232.126, - "profiling_runs": 646350.325, + "compilation_time": 14521.706, + "data": 57421.939, + "framework": 1364991.584, + "kernel_overhead": 615879.149, + "profiling_overhead": 47037.873, + "profiling_runs": 644652.623, "runtimes": [ - 3433.952 + 3477.984 ], - "search_algorithm": 45.81, - "validation": 25.12 + "search_algorithm": 25.837, + "validation": 16.089 }, - "timestamp": "2026-03-05 08:58:55 UTC" + "timestamp": "2026-03-13 09:40:20 UTC" }, { "compilation_data": { @@ -18553,14 +18553,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -18568,49 +18568,49 @@ { "name": "time", "unit": "", - "value": 4063.68 + "value": 4115.008 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.429209512257955 + "value": 11.735250252986877 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 948.0 + "value": 2784.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838984.0 + "value": 1837844.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5275888398072635 + "value": 1.5190985191007014 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68848.0 + "value": 71824.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100239.0 + "value": 2099774.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.539983399135371 + "value": 2.5397556486484207 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18622,7 +18622,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11902843043656156 + "value": 0.11901589314488117 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18652,13 +18652,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.84177420552778 + "value": 98.8477115980538 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96422920446038 + "value": 99.96176285729769 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18718,7 +18718,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.97043235080748 + "value": 40.96808555149869 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18730,13 +18730,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.64290921430275 + "value": 40.63963096047015 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4883877886097197 + "value": 1.48826773536878 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18748,7 +18748,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.2762260385089 + "value": 90.2689634409765 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -18767,19 +18767,19 @@ "time" ], "times": { - "compilation_time": 23662.394, - "data": 78549.809, - "framework": 1990969.145, - "kernel_overhead": 907576.984, - "profiling_overhead": 65052.088, - "profiling_runs": 939790.264, + "compilation_time": 14393.689, + "data": 57160.139, + "framework": 1945709.062, + "kernel_overhead": 904684.044, + "profiling_overhead": 47114.965, + "profiling_runs": 936749.914, "runtimes": [ - 4063.68 + 4115.008 ], - "search_algorithm": 47.174, - "validation": 24.99 + "search_algorithm": 24.912, + "validation": 16.602 }, - "timestamp": "2026-03-05 08:58:56 UTC" + "timestamp": "2026-03-13 09:40:21 UTC" }, { "compilation_data": { @@ -18800,14 +18800,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -18815,49 +18815,49 @@ { "name": "time", "unit": "", - "value": 8103.072 + "value": 8292.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.418377557664186 + "value": 6.369514250128215 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19748.0 + "value": 5620.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999348.0 + "value": 1997420.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 56.34887469636585 + "value": 56.36826607304675 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 22681546.0 + "value": 22818711.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421239.0 + "value": 138423860.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6350268317846489 + "value": 0.6389366602704918 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -18869,7 +18869,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02958363988120223 + "value": 0.02977053718860498 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -18899,13 +18899,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.07325080131201 + "value": 91.90951838698602 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.68810502702553 + "value": 98.70912079509333 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -18965,7 +18965,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.088033249189072 + "value": 9.14357545576909 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -18977,13 +18977,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.4642340837701 + "value": 20.589134239792802 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.630662063576392 + "value": 5.665027902403928 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -18995,7 +18995,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.307171706573023 + "value": 20.431128290977032 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19014,19 +19014,19 @@ "time" ], "times": { - "compilation_time": 25607.037, - "data": 78557.001, - "framework": 591092.995, - "kernel_overhead": 197488.283, - "profiling_overhead": 64432.515, - "profiling_runs": 250615.196, + "compilation_time": 14672.188, + "data": 58052.427, + "framework": 550303.899, + "kernel_overhead": 195800.001, + "profiling_overhead": 48034.79, + "profiling_runs": 248416.681, "runtimes": [ - 8103.072 + 8292.8 ], - "search_algorithm": 39.189, - "validation": 34.171 + "search_algorithm": 26.662, + "validation": 14.565 }, - "timestamp": "2026-03-05 08:58:56 UTC" + "timestamp": "2026-03-13 09:40:22 UTC" }, { "compilation_data": { @@ -19047,14 +19047,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -19062,49 +19062,49 @@ { "name": "time", "unit": "", - "value": 9451.328 + "value": 9450.464 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.950785726052974 + "value": 6.028753431499242 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22116.0 + "value": 12060.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2201804.0 + "value": 2197400.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.75745871606856 + "value": 81.82638758017924 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132964114.0 + "value": 133008102.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138457915.0 + "value": 138417326.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.2746642663928427 + "value": 0.27384396277109796 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -19116,7 +19116,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012739735477556885 + "value": 0.012808863199631511 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19146,13 +19146,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.81132399471615 + "value": 98.74788394591457 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.73398353830851 + "value": 99.3040329380209 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19212,7 +19212,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.2663578775552695 + "value": 7.264286335596472 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19224,13 +19224,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.61701987706072 + "value": 17.6109341898316 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.7031765711830795 + "value": 4.701551888813685 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19242,7 +19242,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.556038092918072 + "value": 16.550332954734046 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19261,19 +19261,19 @@ "time" ], "times": { - "compilation_time": 23279.235, - "data": 79947.019, - "framework": 579298.1980000001, - "kernel_overhead": 186136.893, - "profiling_overhead": 66410.338, - "profiling_runs": 246803.948, + "compilation_time": 14740.728, + "data": 58912.704, + "framework": 534258.179, + "kernel_overhead": 183703.68, + "profiling_overhead": 48838.614, + "profiling_runs": 242803.181, "runtimes": [ - 9451.328 + 9450.464 ], - "search_algorithm": 40.862, - "validation": 27.742 + "search_algorithm": 23.36, + "validation": 15.664 }, - "timestamp": "2026-03-05 08:58:57 UTC" + "timestamp": "2026-03-13 09:40:22 UTC" }, { "compilation_data": { @@ -19294,14 +19294,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -19309,49 +19309,49 @@ { "name": "time", "unit": "", - "value": 2546.048 + "value": 2589.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 24.708048327776442 + "value": 18.566369949170817 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 424.0 + "value": 10820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839784.0 + "value": 1840660.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.412894620607961 + "value": 2.415925107142998 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 43747.0 + "value": 56235.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2102559.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.06746534039022 + "value": 4.06699430780362 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -19363,7 +19363,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.19058351778275434 + "value": 0.19055415545841586 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19393,13 +19393,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.51582017087915 + "value": 98.57617011699715 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94884262156563 + "value": 99.94819289514302 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19459,7 +19459,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.25460766142512 + "value": 45.24841739539231 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19471,13 +19471,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.0858035940318 + "value": 65.07619915787394 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3835133152111254 + "value": 2.383161590254172 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19489,7 +19489,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.70252956606136 + "value": 91.68902941293263 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19508,19 +19508,19 @@ "time" ], "times": { - "compilation_time": 23288.119, - "data": 78376.159, - "framework": 1124358.2689999999, - "kernel_overhead": 478440.128, - "profiling_overhead": 64451.465, - "profiling_runs": 503090.517, + "compilation_time": 16245.419, + "data": 57776.847, + "framework": 1083578.5869999998, + "kernel_overhead": 476823.378, + "profiling_overhead": 47514.37, + "profiling_runs": 501463.992, "runtimes": [ - 2546.048 + 2589.088 ], - "search_algorithm": 44.897, - "validation": 24.582 + "search_algorithm": 25.207, + "validation": 16.091 }, - "timestamp": "2026-03-05 08:58:57 UTC" + "timestamp": "2026-03-13 09:40:23 UTC" }, { "compilation_data": { @@ -19541,14 +19541,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -19556,49 +19556,49 @@ { "name": "time", "unit": "", - "value": 3253.888 + "value": 3303.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.34830898304631 + "value": 14.425315015750787 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4792.0 + "value": 7096.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1844572.0 + "value": 1839252.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.939106682011603 + "value": 1.908622369561957 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59035.0 + "value": 64433.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2135552.0 + "value": 2101960.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5936550347425433 + "value": 1.5934855191918345 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -19610,7 +19610,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07468131898586562 + "value": 0.07466932027948979 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19640,13 +19640,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.60052762610508 + "value": 98.58872206402665 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96615907771245 + "value": 99.96150755674424 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19706,7 +19706,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.37823538399237 + "value": 45.37353463878001 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19718,13 +19718,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.999705865179635 + "value": 50.99388476962389 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0334413053735132 + "value": 1.0333233486032185 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19736,7 +19736,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.1769513707578 + "value": 95.16611334939863 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -19755,19 +19755,19 @@ "time" ], "times": { - "compilation_time": 23768.22, - "data": 77864.859, - "framework": 1877416.517, - "kernel_overhead": 853598.773, - "profiling_overhead": 63835.132, - "profiling_runs": 882117.753, + "compilation_time": 14384.591, + "data": 57535.706, + "framework": 1835294.1860000002, + "kernel_overhead": 851160.555, + "profiling_overhead": 47265.868, + "profiling_runs": 879332.057, "runtimes": [ - 3253.888 + 3303.712 ], - "search_algorithm": 46.577, - "validation": 27.652 + "search_algorithm": 24.752, + "validation": 14.596 }, - "timestamp": "2026-03-05 08:58:58 UTC" + "timestamp": "2026-03-13 09:40:24 UTC" }, { "compilation_data": { @@ -19788,14 +19788,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -19803,49 +19803,49 @@ { "name": "time", "unit": "", - "value": 5451.936 + "value": 5577.856 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.573961335351227 + "value": 8.782946639617906 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 680.0 + "value": 9884.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839628.0 + "value": 1835496.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1631086606762437 + "value": 1.156534322130982 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 91197.0 + "value": 98129.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103107.0 + "value": 2100808.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4767430909031349 + "value": 0.47671759322173357 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -19857,7 +19857,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022331221814056303 + "value": 0.02234203754000494 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -19887,13 +19887,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.86509127756061 + "value": 73.86752547447288 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92342201876644 + "value": 99.97930025131156 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -19953,7 +19953,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.9925449099325 + "value": 42.98940273418644 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -19965,13 +19965,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.5129277008401 + "value": 30.510644214629064 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3687475393534143 + "value": 0.3687199435117526 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -19983,7 +19983,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.94569539701739 + "value": 96.93844030301825 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20002,19 +20002,19 @@ "time" ], "times": { - "compilation_time": 23034.957, - "data": 79816.431, - "framework": 3117125.7630000003, - "kernel_overhead": 1466114.806, - "profiling_overhead": 65633.463, - "profiling_runs": 1505561.063, + "compilation_time": 14983.356, + "data": 59170.929, + "framework": 3073695.7309999997, + "kernel_overhead": 1463222.439, + "profiling_overhead": 49105.928, + "profiling_runs": 1502196.435, "runtimes": [ - 5451.936 + 5577.856 ], - "search_algorithm": 47.707, - "validation": 29.484 + "search_algorithm": 37.82, + "validation": 18.067 }, - "timestamp": "2026-03-05 08:59:0 UTC" + "timestamp": "2026-03-13 09:40:25 UTC" }, { "compilation_data": { @@ -20035,14 +20035,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -20050,49 +20050,49 @@ { "name": "time", "unit": "", - "value": 3232.704 + "value": 3281.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.305513620504563 + "value": 14.768392388047449 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5568.0 + "value": 5056.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835900.0 + "value": 1837132.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8989198248388937 + "value": 1.8995463187179429 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59126.0 + "value": 58984.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100258.0 + "value": 2100639.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5877180670560347 + "value": 1.587539717221002 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -20104,7 +20104,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07440149122676361 + "value": 0.07439086576062935 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20134,13 +20134,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6901638432784 + "value": 98.68425923730632 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96628493216011 + "value": 99.96188077307899 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20200,7 +20200,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.0322090082976 + "value": 42.028416342916906 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20212,13 +20212,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.80854814728028 + "value": 50.80353027224467 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0295677481016268 + "value": 1.0294660675283955 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20230,7 +20230,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.26302357809035 + "value": 89.25423271457413 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20249,19 +20249,19 @@ "time" ], "times": { - "compilation_time": 23607.744, - "data": 77471.678, - "framework": 1704561.339, - "kernel_overhead": 767081.195, - "profiling_overhead": 64497.04, - "profiling_runs": 795511.426, + "compilation_time": 15309.066, + "data": 57230.757, + "framework": 1662959.583, + "kernel_overhead": 765418.246, + "profiling_overhead": 46997.44, + "profiling_runs": 793313.14, "runtimes": [ - 3232.704 + 3281.44 ], - "search_algorithm": 47.313, - "validation": 26.932 + "search_algorithm": 25.39, + "validation": 15.594 }, - "timestamp": "2026-03-05 08:59:1 UTC" + "timestamp": "2026-03-13 09:40:26 UTC" }, { "compilation_data": { @@ -20282,14 +20282,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -20297,49 +20297,49 @@ { "name": "time", "unit": "", - "value": 6175.968 + "value": 6350.656 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.190654073865238 + "value": 7.770033259722073 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13804.0 + "value": 8304.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836884.0 + "value": 1838204.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0295931641592255 + "value": 1.0284505501879089 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 113312.0 + "value": 112179.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102120.0 + "value": 2108940.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.41933893379176135 + "value": 0.4193071874215905 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -20351,7 +20351,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019652898591427114 + "value": 0.019652238541583836 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20381,13 +20381,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97702198863051 + "value": 81.97197462420063 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9771024059319 + "value": 99.97820289586483 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20447,7 +20447,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.84660159485612 + "value": 42.84488642392211 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20459,13 +20459,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.838903006557924 + "value": 26.837706198209982 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.32434709443960386 + "value": 0.3243326310574693 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20477,7 +20477,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.6548829783424 + "value": 93.65070669193656 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20496,19 +20496,19 @@ "time" ], "times": { - "compilation_time": 23757.858, - "data": 78038.998, - "framework": 3595039.245, - "kernel_overhead": 1704502.447, - "profiling_overhead": 65056.652, - "profiling_runs": 1747441.148, + "compilation_time": 14909.53, + "data": 57943.266, + "framework": 3557017.2530000005, + "kernel_overhead": 1704352.269, + "profiling_overhead": 47902.81, + "profiling_runs": 1746818.908, "runtimes": [ - 6175.968 + 6350.656 ], - "search_algorithm": 43.842, - "validation": 26.122 + "search_algorithm": 24.478, + "validation": 14.822 }, - "timestamp": "2026-03-05 08:59:3 UTC" + "timestamp": "2026-03-13 09:40:28 UTC" }, { "compilation_data": { @@ -20529,14 +20529,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -20544,49 +20544,49 @@ { "name": "time", "unit": "", - "value": 5800.224 + "value": 5890.464 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.714771490750815 + "value": 8.17588452067444 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5772.0 + "value": 784.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834412.0 + "value": 1835156.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0775054867102731 + "value": 1.0730006440710291 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 99952.0 + "value": 96089.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2099897.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.44089897380706666 + "value": 0.4408700479849637 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -20598,7 +20598,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02066255018714506 + "value": 0.020661458571186158 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20628,13 +20628,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.97973706333657 + "value": 81.97614854706899 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97153833762317 + "value": 99.97239316892632 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20694,7 +20694,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.947888078140636 + "value": 43.945559484850335 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20706,13 +20706,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.219300204131258 + "value": 28.217568078878802 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34102914065051204 + "value": 0.3410082079844972 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20724,7 +20724,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.5034632208357 + "value": 94.49766199026737 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20743,19 +20743,19 @@ "time" ], "times": { - "compilation_time": 24311.589, - "data": 78860.412, - "framework": 3491183.562, - "kernel_overhead": 1652790.248, - "profiling_overhead": 65500.38, - "profiling_runs": 1694032.522, + "compilation_time": 14889.316, + "data": 58551.531, + "framework": 3468168.438, + "kernel_overhead": 1660172.303, + "profiling_overhead": 48494.321, + "profiling_runs": 1700950.283, "runtimes": [ - 5800.224 + 5890.464 ], - "search_algorithm": 36.929, - "validation": 32.185 + "search_algorithm": 37.604, + "validation": 17.681 }, - "timestamp": "2026-03-05 08:59:5 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -20776,14 +20776,14 @@ "registers": 44 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -20791,49 +20791,49 @@ { "name": "time", "unit": "", - "value": 3652.096 + "value": 3676.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.093173019654557 + "value": 13.040931456482166 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 188.0 + "value": 10800.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836576.0 + "value": 1841488.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.6843900360062083 + "value": 1.6927700555485554 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59606.0 + "value": 71790.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098980.0 + "value": 2105194.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 22.551583748153735 + "value": 22.54942837054676 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -20845,7 +20845,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0567276250792648 + "value": 1.0566778733024913 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -20875,13 +20875,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 72.73519530002119 + "value": 72.35160803913173 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95301640059463 + "value": 99.95950820364654 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -20941,7 +20941,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.08644467134761 + "value": 40.08077323559749 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -20953,13 +20953,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.108238810937046 + "value": 45.103185684335855 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.981875934155154 + "value": 11.980533697401711 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -20971,7 +20971,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 57.355090308352175 + "value": 57.34866727810712 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -20990,19 +20990,19 @@ "time" ], "times": { - "compilation_time": 23542.513, - "data": 75305.716, - "framework": 280767.43, - "kernel_overhead": 56108.562, - "profiling_overhead": 62703.69, - "profiling_runs": 86649.462, + "compilation_time": 15031.125, + "data": 57539.871, + "framework": 242777.946, + "kernel_overhead": 53876.744, + "profiling_overhead": 47201.804, + "profiling_runs": 84159.527, "runtimes": [ - 3652.096 + 3676.544 ], - "search_algorithm": 34.442, - "validation": 30.438 + "search_algorithm": 23.131, + "validation": 14.219 }, - "timestamp": "2026-03-05 08:59:5 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -21023,14 +21023,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -21038,49 +21038,49 @@ { "name": "time", "unit": "", - "value": 2145.696 + "value": 2117.824 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.848731884057973 + "value": 22.541269710906704 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6644.0 + "value": 5360.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838724.0 + "value": 1839028.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.9231616774994507 + "value": 2.895358781142724 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 40668.0 + "value": 40711.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100390.0 + "value": 2100383.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.82712463689837 + "value": 19.82883176662523 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -21092,7 +21092,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9290833892603654 + "value": 0.9288732224683685 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21122,13 +21122,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.14398704992769 + "value": 90.47631412796376 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91212727952865 + "value": 99.90820604366661 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21188,7 +21188,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.290458375920636 + "value": 50.279944035428194 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21200,13 +21200,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.35151088156461 + "value": 79.33667458973002 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.693855958648355 + "value": 10.691856536506586 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21218,7 +21218,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.9429847673884 + "value": 75.92878576509746 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21237,19 +21237,19 @@ "time" ], "times": { - "compilation_time": 24249.836, - "data": 78457.512, - "framework": 295900.991, - "kernel_overhead": 64400.751, - "profiling_overhead": 65746.26, - "profiling_runs": 87296.468, + "compilation_time": 14753.418, + "data": 57794.656, + "framework": 253746.623, + "kernel_overhead": 62801.203, + "profiling_overhead": 48161.41, + "profiling_runs": 84989.354, "runtimes": [ - 2145.696 + 2117.824 ], - "search_algorithm": 41.357, - "validation": 24.348 + "search_algorithm": 36.239, + "validation": 16.702 }, - "timestamp": "2026-03-05 08:59:5 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -21270,14 +21270,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -21285,49 +21285,49 @@ { "name": "time", "unit": "", - "value": 1840.8 + "value": 1915.424 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.38343948664021 + "value": 25.787899533129828 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 508.0 + "value": 660.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840992.0 + "value": 1837584.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3561161240681017 + "value": 3.3015241077249216 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30478.0 + "value": 30642.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2099355.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.3842722234391 + "value": 11.380418205211294 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -21339,7 +21339,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5331299965345249 + "value": 0.5330977892580449 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21369,13 +21369,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.83346418710724 + "value": 94.30376504681475 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.8585236045497 + "value": 99.88919487288169 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21435,7 +21435,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 46.22532471978186 + "value": 46.20739680292629 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21447,13 +21447,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.11642764605563 + "value": 91.08294727555143 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.317642932490186 + "value": 6.3153215396134295 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21465,7 +21465,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 67.00403306654975 + "value": 66.9794126999452 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21484,19 +21484,19 @@ "time" ], "times": { - "compilation_time": 23738.494, - "data": 78891.017, - "framework": 240149.04000000004, - "kernel_overhead": 37616.868, - "profiling_overhead": 64694.833, - "profiling_runs": 58946.322, + "compilation_time": 14997.234, + "data": 57158.142, + "framework": 195996.523, + "kernel_overhead": 35286.289, + "profiling_overhead": 47364.183, + "profiling_runs": 56187.909, "runtimes": [ - 1840.8 + 1915.424 ], - "search_algorithm": 36.819, - "validation": 27.039 + "search_algorithm": 22.391, + "validation": 13.607 }, - "timestamp": "2026-03-05 08:59:5 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -21517,14 +21517,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -21532,49 +21532,49 @@ { "name": "time", "unit": "", - "value": 1802.912 + "value": 1801.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.418826520537124 + "value": 26.551443353112447 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 172.0 + "value": 5428.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836484.0 + "value": 1838848.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4628360785498176 + "value": 3.4691322084737455 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28648.0 + "value": 34716.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2100488.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.876784090846199 + "value": 5.875582395554313 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -21586,7 +21586,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2753039085851221 + "value": 0.27522587481590294 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21616,13 +21616,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.31117234185692 + "value": 97.17111416997169 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90957768810586 + "value": 99.898441445012 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21682,7 +21682,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.21470186518152 + "value": 43.20921280112353 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21694,13 +21694,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.05544790751537 + "value": 94.0392701944256 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.4444133755193618 + "value": 3.4438209299716402 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21712,7 +21712,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 60.8984758456798 + "value": 60.888001193846705 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21731,19 +21731,19 @@ "time" ], "times": { - "compilation_time": 23311.967, - "data": 77615.47, - "framework": 227978.95, - "kernel_overhead": 32220.56, - "profiling_overhead": 64676.633, - "profiling_runs": 53466.287, + "compilation_time": 15896.335, + "data": 57649.619, + "framework": 185313.43899999998, + "kernel_overhead": 29796.376, + "profiling_overhead": 47287.241, + "profiling_runs": 50580.203, "runtimes": [ - 1802.912 + 1801.76 ], - "search_algorithm": 36.194, - "validation": 27.27 + "search_algorithm": 27.185, + "validation": 17.711 }, - "timestamp": "2026-03-05 08:59:5 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -21764,14 +21764,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -21779,49 +21779,49 @@ { "name": "time", "unit": "", - "value": 1756.224 + "value": 1778.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.20256978094085 + "value": 27.51541641092527 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 868.0 + "value": 524.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835280.0 + "value": 1835692.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5432683322657783 + "value": 3.535438803144819 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 29178.0 + "value": 29258.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099900.0 + "value": 2099303.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0041930022446444 + "value": 3.003679935882598 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -21833,7 +21833,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1407434870916987 + "value": 0.14069889611823105 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -21863,13 +21863,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.08951524677886 + "value": 98.02594052159382 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.92421242977572 + "value": 99.91443336176012 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -21929,7 +21929,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.648535653279666 + "value": 39.64005106078096 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -21941,13 +21941,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.1537597861542 + "value": 96.1327039397227 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.9484282378541988 + "value": 1.9480015690910606 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -21959,7 +21959,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.524564640668764 + "value": 58.51174887300367 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -21978,19 +21978,19 @@ "time" ], "times": { - "compilation_time": 23355.022, - "data": 77540.835, - "framework": 234802.39300000004, - "kernel_overhead": 35888.629, - "profiling_overhead": 64291.736, - "profiling_runs": 57081.193, + "compilation_time": 15772.63, + "data": 57124.496, + "framework": 191622.517, + "kernel_overhead": 33372.821, + "profiling_overhead": 47253.896, + "profiling_runs": 53871.304, "runtimes": [ - 1756.224 + 1778.56 ], - "search_algorithm": 33.882, - "validation": 26.702 + "search_algorithm": 42.291, + "validation": 14.398 }, - "timestamp": "2026-03-05 08:59:6 UTC" + "timestamp": "2026-03-13 09:40:30 UTC" }, { "compilation_data": { @@ -22011,14 +22011,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -22026,49 +22026,49 @@ { "name": "time", "unit": "", - "value": 1752.448 + "value": 1810.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.02496919324011 + "value": 27.29094279258753 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2952.0 + "value": 1812.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1830644.0 + "value": 1838008.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.541810176239485 + "value": 3.5257109843983683 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 32115.0 + "value": 29941.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103028.0 + "value": 2099375.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4975219985175534 + "value": 1.4974026481070932 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -22080,7 +22080,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07013580135860718 + "value": 0.07008449493478686 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22110,13 +22110,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.50095599888039 + "value": 81.5524370488381 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.88440620523762 + "value": 99.8299315290226 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22176,7 +22176,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.03346090230104 + "value": 38.01990359255715 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22188,13 +22188,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.86956672515153 + "value": 95.85171062295778 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.158579968968506 + "value": 1.1583641786709988 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22206,7 +22206,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 53.64109199470638 + "value": 53.631101120092474 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22225,19 +22225,19 @@ "time" ], "times": { - "compilation_time": 23522.335, - "data": 78010.587, - "framework": 214941.74300000002, - "kernel_overhead": 25393.347, - "profiling_overhead": 64737.256, - "profiling_runs": 46800.553, + "compilation_time": 15786.739, + "data": 56888.519, + "framework": 171378.13999999998, + "kernel_overhead": 23334.802, + "profiling_overhead": 47257.214, + "profiling_runs": 43897.605, "runtimes": [ - 1752.448 + 1810.592 ], - "search_algorithm": 36.308, - "validation": 27.219 + "search_algorithm": 35.802, + "validation": 15.892 }, - "timestamp": "2026-03-05 08:59:6 UTC" + "timestamp": "2026-03-13 09:40:31 UTC" }, { "compilation_data": { @@ -22258,14 +22258,14 @@ "registers": 19 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -22273,49 +22273,49 @@ { "name": "time", "unit": "", - "value": 5212.32 + "value": 5348.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 12.077077097998561 + "value": 9.264298551898865 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10092.0 + "value": 6212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838980.0 + "value": 1837920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.2097525102507767 + "value": 1.207515679406562 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 93613.0 + "value": 90079.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100774.0 + "value": 2100718.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.958858416767719 + "value": 7.958273511860672 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -22327,7 +22327,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.37298925519732545 + "value": 0.3729312405369703 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22357,13 +22357,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.64592044196938 + "value": 96.5919553059146 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96989955956049 + "value": 99.95806105022952 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22423,7 +22423,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.28375207420477 + "value": 36.28260351733506 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22435,13 +22435,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 31.837999821678565 + "value": 31.83681788621662 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.2906679447184 + "value": 4.290508660447161 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22453,7 +22453,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.85002557366224 + "value": 84.84687565046562 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22472,19 +22472,19 @@ "time" ], "times": { - "compilation_time": 23618.779, - "data": 78619.611, - "framework": 2143222.122, - "kernel_overhead": 980486.144, - "profiling_overhead": 65341.179, - "profiling_runs": 1018775.188, + "compilation_time": 15855.153, + "data": 57489.225, + "framework": 2111278.6070000003, + "kernel_overhead": 983928.701, + "profiling_overhead": 47818.063, + "profiling_runs": 1022042.618, "runtimes": [ - 5212.32 + 5348.8 ], - "search_algorithm": 44.546, - "validation": 27.933 + "search_algorithm": 23.372, + "validation": 14.232 }, - "timestamp": "2026-03-05 08:59:7 UTC" + "timestamp": "2026-03-13 09:40:32 UTC" }, { "compilation_data": { @@ -22505,14 +22505,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -22520,49 +22520,49 @@ { "name": "time", "unit": "", - "value": 5763.552 + "value": 5766.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.988757674333364 + "value": 8.377835586869757 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2324.0 + "value": 4868.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839652.0 + "value": 1838288.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1071385220582493 + "value": 1.0960094059460839 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 97932.0 + "value": 97312.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103766.0 + "value": 2100424.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.621197714728821 + "value": 3.6210682863864716 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -22574,7 +22574,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1696422103173402 + "value": 0.16969768112289374 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22604,13 +22604,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.52189164443634 + "value": 98.59914054366196 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93498679381365 + "value": 99.97134774266385 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22670,7 +22670,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 33.79543895989285 + "value": 33.79410762566248 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22682,13 +22682,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.971105605447793 + "value": 28.9700381481878 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.0087387675652284 + "value": 2.0086647544153653 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22700,7 +22700,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 84.93327551332058 + "value": 84.93014609731566 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22719,19 +22719,19 @@ "time" ], "times": { - "compilation_time": 23314.469, - "data": 84564.626, - "framework": 2741347.61, - "kernel_overhead": 1272550.615, - "profiling_overhead": 70901.178, - "profiling_runs": 1313331.191, + "compilation_time": 14094.86, + "data": 57985.872, + "framework": 2699925.7520000003, + "kernel_overhead": 1277035.939, + "profiling_overhead": 47752.929, + "profiling_runs": 1317151.012, "runtimes": [ - 5763.552 + 5766.336 ], - "search_algorithm": 43.733, - "validation": 32.403 + "search_algorithm": 35.035, + "validation": 15.817 }, - "timestamp": "2026-03-05 08:59:8 UTC" + "timestamp": "2026-03-13 09:40:33 UTC" }, { "compilation_data": { @@ -22752,14 +22752,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -22767,49 +22767,49 @@ { "name": "time", "unit": "", - "value": 7988.736 + "value": 8045.503 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.981624849607649 + "value": 6.12388964583231 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7120.0 + "value": 6252.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1911964.0 + "value": 1909528.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.16923766223257 + "value": 49.135414583517814 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 701509.0 + "value": 708460.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415583.0 + "value": 138415765.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2788183418797983 + "value": 1.2786563572445209 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -22821,7 +22821,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.060075637221961346 + "value": 0.06002141633300066 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -22851,13 +22851,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.40039340703119 + "value": 96.4582943423236 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.10358630543877 + "value": 100.22835303966491 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -22917,7 +22917,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.711895970282031 + "value": 10.689245919838248 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -22929,13 +22929,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.48459826656783 + "value": 20.440633300861617 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.092489979308457 + "value": 11.068682778640396 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -22947,7 +22947,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.162099901977125 + "value": 30.097364647198805 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -22966,19 +22966,19 @@ "time" ], "times": { - "compilation_time": 23038.686, - "data": 77706.384, - "framework": 945479.5329999999, - "kernel_overhead": 375227.556, - "profiling_overhead": 64479.427, - "profiling_runs": 428066.166, + "compilation_time": 16156.747, + "data": 59548.319, + "framework": 914479.264, + "kernel_overhead": 376537.66, + "profiling_overhead": 49060.053, + "profiling_runs": 429333.232, "runtimes": [ - 7988.736 + 8045.503 ], - "search_algorithm": 48.681, - "validation": 31.207 + "search_algorithm": 23.061, + "validation": 16.064 }, - "timestamp": "2026-03-05 08:59:9 UTC" + "timestamp": "2026-03-13 09:40:34 UTC" }, { "compilation_data": { @@ -22999,14 +22999,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -23014,49 +23014,49 @@ { "name": "time", "unit": "", - "value": 8049.568 + "value": 8259.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.299550297587416 + "value": 6.445551580227238 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 856.0 + "value": 8664.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1998260.0 + "value": 1999840.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 51.79046508667577 + "value": 51.717233920607455 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 8378613.0 + "value": 8481421.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414780.0 + "value": 138419231.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6468986545470891 + "value": 0.6435627625105395 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -23068,7 +23068,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02998399048164469 + "value": 0.02995279217647109 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23098,13 +23098,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.22455523897554 + "value": 85.2278319074976 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90366075291342 + "value": 99.08454204233192 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23164,7 +23164,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.259012466563266 + "value": 9.326136681167359 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23176,13 +23176,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.488809600375443 + "value": 20.636693041115358 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.759626330665913 + "value": 10.837286799667755 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23194,7 +23194,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.014880678049842 + "value": 28.217085541455482 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23213,19 +23213,19 @@ "time" ], "times": { - "compilation_time": 24411.875, - "data": 78500.195, - "framework": 918128.9310000001, - "kernel_overhead": 360829.543, - "profiling_overhead": 65240.154, - "profiling_runs": 413559.039, + "compilation_time": 14791.697, + "data": 59562.649, + "framework": 880938.26, + "kernel_overhead": 359564.884, + "profiling_overhead": 49416.463, + "profiling_runs": 412394.264, "runtimes": [ - 8049.568 + 8259.104 ], - "search_algorithm": 55.91, - "validation": 31.469 + "search_algorithm": 24.176, + "validation": 16.915 }, - "timestamp": "2026-03-05 08:59:9 UTC" + "timestamp": "2026-03-13 09:40:34 UTC" }, { "compilation_data": { @@ -23246,14 +23246,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -23261,49 +23261,49 @@ { "name": "time", "unit": "", - "value": 8524.96 + "value": 8780.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.594076310712438 + "value": 6.545309701688259 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6720.0 + "value": 33484.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2205596.0 + "value": 2202620.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.277786915256804 + "value": 61.33171293707412 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 47211445.0 + "value": 47415751.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415967.0 + "value": 138422062.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.30079414426282525 + "value": 0.3000275184121926 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -23315,7 +23315,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.014027945887472722 + "value": 0.013987374527684221 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23345,13 +23345,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.82181887423913 + "value": 76.12008888082325 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.30158362985749 + "value": 98.81929734979549 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23411,7 +23411,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.03136331249332 + "value": 8.045589900817038 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23423,13 +23423,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.287529380955394 + "value": 19.325606638210306 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.971031119182875 + "value": 9.99071583408455 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23441,7 +23441,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.358748173548225 + "value": 25.408811173261398 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23460,19 +23460,19 @@ "time" ], "times": { - "compilation_time": 23578.843, - "data": 78144.283, - "framework": 898934.0789999999, - "kernel_overhead": 350134.852, - "profiling_overhead": 65090.84, - "profiling_runs": 405564.104, + "compilation_time": 14726.061, + "data": 59294.812, + "framework": 865387.493, + "kernel_overhead": 350523.9, + "profiling_overhead": 48945.84, + "profiling_runs": 406622.941, "runtimes": [ - 8524.96 + 8780.448 ], - "search_algorithm": 46.635, - "validation": 28.326 + "search_algorithm": 22.094, + "validation": 17.816 }, - "timestamp": "2026-03-05 08:59:10 UTC" + "timestamp": "2026-03-13 09:40:35 UTC" }, { "compilation_data": { @@ -23493,14 +23493,14 @@ "registers": 23 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -23508,49 +23508,49 @@ { "name": "time", "unit": "", - "value": 3445.024 + "value": 3473.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.378487602464233 + "value": 14.020032659696668 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9220.0 + "value": 5944.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838956.0 + "value": 1837540.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8180915528614652 + "value": 1.8047616145495888 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 65594.0 + "value": 61875.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100634.0 + "value": 2100790.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.060092967716212 + "value": 6.059821416974538 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -23562,7 +23562,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2839351357946128 + "value": 0.2839350045785906 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23592,13 +23592,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.71803381668222 + "value": 97.61281333963794 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93649791294504 + "value": 99.94483386116715 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23658,7 +23658,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.385593063683345 + "value": 41.38294054596213 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23670,13 +23670,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.48905473736537 + "value": 48.48498807724858 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.362034068704044 + "value": 3.3617521030123525 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23688,7 +23688,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.6029746244573 + "value": 87.59562757418074 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23707,19 +23707,19 @@ "time" ], "times": { - "compilation_time": 22903.166, - "data": 77845.534, - "framework": 1395668.1, - "kernel_overhead": 611881.111, - "profiling_overhead": 64611.143, - "profiling_runs": 641330.312, + "compilation_time": 14586.296, + "data": 57243.152, + "framework": 1360315.105, + "kernel_overhead": 613556.265, + "profiling_overhead": 47030.135, + "profiling_runs": 642485.553, "runtimes": [ - 3445.024 + 3473.44 ], - "search_algorithm": 45.225, - "validation": 29.896 + "search_algorithm": 23.974, + "validation": 16.252 }, - "timestamp": "2026-03-05 08:59:11 UTC" + "timestamp": "2026-03-13 09:40:35 UTC" }, { "compilation_data": { @@ -23740,14 +23740,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -23755,49 +23755,49 @@ { "name": "time", "unit": "", - "value": 4075.296 + "value": 4127.711 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.402028069913992 + "value": 11.698888142903646 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8840.0 + "value": 924.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843228.0 + "value": 1839152.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5315372854454459 + "value": 1.5159965348459281 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 76959.0 + "value": 69672.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109265.0 + "value": 2102900.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5332294820762513 + "value": 2.5329366295646727 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -23809,7 +23809,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1186618271671367 + "value": 0.11868474928498668 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -23839,13 +23839,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.12642663888748 + "value": 98.05694193438794 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9106335054918 + "value": 99.942124372741 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -23905,7 +23905,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.866962747809325 + "value": 40.85957669486962 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -23917,13 +23917,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 40.539465705776266 + "value": 40.534520697385496 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.4845995741861426 + "value": 1.4844184825702695 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -23935,7 +23935,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.04646255708984 + "value": 90.03547867989383 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -23954,19 +23954,19 @@ "time" ], "times": { - "compilation_time": 23562.709, - "data": 78056.506, - "framework": 1978049.5779999997, - "kernel_overhead": 901502.668, - "profiling_overhead": 64437.894, - "profiling_runs": 934052.51, + "compilation_time": 14585.207, + "data": 57362.995, + "framework": 1951134.7340000002, + "kernel_overhead": 907221.305, + "profiling_overhead": 47301.246, + "profiling_runs": 939249.188, "runtimes": [ - 4075.296 + 4127.711 ], - "search_algorithm": 57.267, - "validation": 25.757 + "search_algorithm": 24.154, + "validation": 17.024 }, - "timestamp": "2026-03-05 08:59:12 UTC" + "timestamp": "2026-03-13 09:40:36 UTC" }, { "compilation_data": { @@ -23987,14 +23987,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -24002,49 +24002,49 @@ { "name": "time", "unit": "", - "value": 8054.656 + "value": 8125.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.337191522746194 + "value": 6.382699748579639 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1828.0 + "value": 4548.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1997732.0 + "value": 1997132.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.21900155539085 + "value": 53.310037761541466 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 12835159.0 + "value": 12879676.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138414846.0 + "value": 138419285.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.6426133288500948 + "value": 0.6405844590323392 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -24056,7 +24056,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029762784936339538 + "value": 0.029936737446699852 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24086,13 +24086,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.91331211123594 + "value": 86.71388132456342 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.24235457259728 + "value": 99.6202526347094 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24152,7 +24152,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.092342218022935 + "value": 9.11038583367539 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24164,13 +24164,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.47317525940482 + "value": 20.514716860387914 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.633122196618465 + "value": 5.644552222084273 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24182,7 +24182,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.3160555466965 + "value": 20.35727834005307 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24201,19 +24201,19 @@ "time" ], "times": { - "compilation_time": 23052.975, - "data": 79186.412, - "framework": 591723.954, - "kernel_overhead": 196805.399, - "profiling_overhead": 66079.039, - "profiling_runs": 249653.104, + "compilation_time": 16380.58, + "data": 58981.219, + "framework": 552243.067, + "kernel_overhead": 196152.587, + "profiling_overhead": 48208.755, + "profiling_runs": 248900.506, "runtimes": [ - 8054.656 + 8125.76 ], - "search_algorithm": 48.377, - "validation": 36.974 + "search_algorithm": 26.987, + "validation": 14.017 }, - "timestamp": "2026-03-05 08:59:12 UTC" + "timestamp": "2026-03-13 09:40:37 UTC" }, { "compilation_data": { @@ -24234,14 +24234,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -24249,49 +24249,49 @@ { "name": "time", "unit": "", - "value": 9240.864 + "value": 9822.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.919286503248944 + "value": 6.167041910783482 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2196.0 + "value": 18080.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2191160.0 + "value": 2194256.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.1084683769445 + "value": 79.44415870359379 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118713505.0 + "value": 119613413.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416013.0 + "value": 138421443.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.28142749271095907 + "value": 0.2792457943331886 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -24303,7 +24303,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013038542304552167 + "value": 0.013009034588194182 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24333,13 +24333,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.34121754177049 + "value": 94.69213935633907 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.21017959626442 + "value": 98.52317582855736 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24399,7 +24399,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.400835631716535 + "value": 7.436394387936179 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24411,13 +24411,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.943679266509655 + "value": 18.027909076596675 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.790384101056715 + "value": 4.8128707459127105 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24429,7 +24429,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.86303156629034 + "value": 16.942188684806776 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24448,19 +24448,19 @@ "time" ], "times": { - "compilation_time": 23060.91, - "data": 77544.198, - "framework": 574807.198, - "kernel_overhead": 186936.679, - "profiling_overhead": 64102.402, - "profiling_runs": 246223.919, + "compilation_time": 14446.466, + "data": 59320.634, + "framework": 536863.9010000001, + "kernel_overhead": 184566.688, + "profiling_overhead": 49107.514, + "profiling_runs": 243869.065, "runtimes": [ - 9240.864 + 9822.048 ], - "search_algorithm": 44.824, - "validation": 30.024 + "search_algorithm": 30.978, + "validation": 14.178 }, - "timestamp": "2026-03-05 08:59:13 UTC" + "timestamp": "2026-03-13 09:40:37 UTC" }, { "compilation_data": { @@ -24481,14 +24481,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -24496,49 +24496,49 @@ { "name": "time", "unit": "", - "value": 2551.072 + "value": 2569.664 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 24.67507710760795 + "value": 18.792130139140003 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6116.0 + "value": 252.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837132.0 + "value": 1835688.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.426071821444852 + "value": 2.4201583944003566 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46952.0 + "value": 41340.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100326.0 + "value": 2099098.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 4.078931781659618 + "value": 4.07841962173134 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -24550,7 +24550,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1911141263014284 + "value": 0.19107541888908336 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24580,13 +24580,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.30925077305018 + "value": 98.31426708659741 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94343067353732 + "value": 99.93919243073574 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24646,7 +24646,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.383314376981495 + "value": 45.37643026506167 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24658,13 +24658,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.2705448851743 + "value": 65.26009272355859 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.3902787433535506 + "value": 2.3898959737631316 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24676,7 +24676,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.96287300709014 + "value": 91.94814644378341 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24695,19 +24695,19 @@ "time" ], "times": { - "compilation_time": 23930.093, - "data": 76904.437, - "framework": 1115730.438, - "kernel_overhead": 475397.66, - "profiling_overhead": 63097.784, - "profiling_runs": 500330.557, + "compilation_time": 15023.824, + "data": 57341.062, + "framework": 1083610.326, + "kernel_overhead": 477397.596, + "profiling_overhead": 47137.737, + "profiling_runs": 501733.931, "runtimes": [ - 2551.072 + 2569.664 ], - "search_algorithm": 46.746, - "validation": 27.78 + "search_algorithm": 24.901, + "validation": 15.466 }, - "timestamp": "2026-03-05 08:59:13 UTC" + "timestamp": "2026-03-13 09:40:38 UTC" }, { "compilation_data": { @@ -24728,14 +24728,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -24743,49 +24743,49 @@ { "name": "time", "unit": "", - "value": 3285.824 + "value": 3306.56 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.242696392221983 + "value": 14.713832292413228 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5100.0 + "value": 2420.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834120.0 + "value": 1837068.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9051201904603687 + "value": 1.9098400446895907 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 57355.0 + "value": 57096.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100134.0 + "value": 2103671.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5936535451639076 + "value": 1.59355358021411 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -24797,7 +24797,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0746825308699038 + "value": 0.07467034119934253 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -24827,13 +24827,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.32449121415682 + "value": 98.3678901921875 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96281124634557 + "value": 99.95966484453128 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -24893,7 +24893,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.38005377589994 + "value": 45.375150850533416 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -24905,13 +24905,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 51.002241505139246 + "value": 50.99552204851818 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0334926867496477 + "value": 1.0333565258855002 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -24923,7 +24923,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.18175146603859 + "value": 95.16921143581212 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -24942,19 +24942,19 @@ "time" ], "times": { - "compilation_time": 23717.074, - "data": 76407.324, - "framework": 1868483.835, - "kernel_overhead": 850240.442, - "profiling_overhead": 63186.081, - "profiling_runs": 878649.988, + "compilation_time": 14574.928, + "data": 57463.432, + "framework": 1845392.958, + "kernel_overhead": 856214.361, + "profiling_overhead": 47397.522, + "profiling_runs": 884317.643, "runtimes": [ - 3285.824 + 3306.56 ], - "search_algorithm": 44.552, - "validation": 26.84 + "search_algorithm": 25.305, + "validation": 15.046 }, - "timestamp": "2026-03-05 08:59:14 UTC" + "timestamp": "2026-03-13 09:40:39 UTC" }, { "compilation_data": { @@ -24975,14 +24975,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -24990,49 +24990,49 @@ { "name": "time", "unit": "", - "value": 5451.52 + "value": 5474.08 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.572388954133753 + "value": 8.777453107289547 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10112.0 + "value": 6788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1831368.0 + "value": 1836876.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1681980496801165 + "value": 1.1645846960726136 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 100582.0 + "value": 100293.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108924.0 + "value": 2109537.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.47711521982472127 + "value": 0.4770953197024484 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25044,7 +25044,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.022359654681627303 + "value": 0.022355722628357652 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25074,13 +25074,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.73993089953713 + "value": 65.7579605099478 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96950885875762 + "value": 99.95633079388723 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25140,7 +25140,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02746772980107 + "value": 43.02563571652544 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25152,13 +25152,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 30.53769314979895 + "value": 30.536348276118996 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3690468288366816 + "value": 0.36903057609079354 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25170,7 +25170,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.02434561532576 + "value": 97.02007268324498 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25189,19 +25189,19 @@ "time" ], "times": { - "compilation_time": 23602.028, - "data": 79279.707, - "framework": 3091964.3839999996, - "kernel_overhead": 1453865.355, - "profiling_overhead": 65996.476, - "profiling_runs": 1492822.846, + "compilation_time": 14485.702, + "data": 57765.688, + "framework": 3076438.659, + "kernel_overhead": 1466110.634, + "profiling_overhead": 47686.888, + "profiling_runs": 1504875.449, "runtimes": [ - 5451.52 + 5474.08 ], - "search_algorithm": 46.017, - "validation": 25.615 + "search_algorithm": 31.983, + "validation": 16.625 }, - "timestamp": "2026-03-05 08:59:16 UTC" + "timestamp": "2026-03-13 09:40:40 UTC" }, { "compilation_data": { @@ -25222,14 +25222,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -25237,49 +25237,49 @@ { "name": "time", "unit": "", - "value": 3250.912 + "value": 3240.512 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.325615979934547 + "value": 14.723971928787014 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7568.0 + "value": 8624.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836708.0 + "value": 1839708.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.9105284131095486 + "value": 1.9106651029471386 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 59956.0 + "value": 63056.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100317.0 + "value": 2106623.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.5962124904598585 + "value": 1.5960594117261604 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25291,7 +25291,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0747984177164143 + "value": 0.07479020034132768 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25321,13 +25321,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.53428792683535 + "value": 98.52535531818756 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95608948478998 + "value": 99.95761706054063 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25387,7 +25387,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.2606766213056 + "value": 42.25551024595298 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25399,13 +25399,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 51.08481810122784 + "value": 51.07842530441994 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0351659917973413 + "value": 1.0350364502604625 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25417,7 +25417,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.74845614533342 + "value": 89.73722494856527 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25436,19 +25436,19 @@ "time" ], "times": { - "compilation_time": 23873.486, - "data": 76100.779, - "framework": 1692229.267, - "kernel_overhead": 762357.011, - "profiling_overhead": 62902.878, - "profiling_runs": 790868.599, + "compilation_time": 14520.729, + "data": 58542.494, + "framework": 1671651.1439999999, + "kernel_overhead": 768327.674, + "profiling_overhead": 48488.431, + "profiling_runs": 796292.545, "runtimes": [ - 3250.912 + 3240.512 ], - "search_algorithm": 44.932, - "validation": 25.656 + "search_algorithm": 26.552, + "validation": 17.149 }, - "timestamp": "2026-03-05 08:59:17 UTC" + "timestamp": "2026-03-13 09:40:41 UTC" }, { "compilation_data": { @@ -25469,14 +25469,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -25484,49 +25484,49 @@ { "name": "time", "unit": "", - "value": 6145.376 + "value": 6692.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.123611052282321 + "value": 7.737679127307858 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6812.0 + "value": 12112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1828772.0 + "value": 1833244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0272348554438024 + "value": 1.0272868550267 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106246.0 + "value": 114605.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100518.0 + "value": 2102181.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4200296378029494 + "value": 0.4200088916868048 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25538,7 +25538,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019684418782933914 + "value": 0.0196833549202028 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25568,13 +25568,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.96010490876205 + "value": 81.95622202719717 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9700862072777 + "value": 99.96903631908961 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25634,7 +25634,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.91808833600284 + "value": 42.91620457340909 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25646,13 +25646,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.88383508633597 + "value": 26.882664446823085 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.3248900968685622 + "value": 0.324875949735777 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25664,7 +25664,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.81168858090346 + "value": 93.80760361054595 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25683,19 +25683,19 @@ "time" ], "times": { - "compilation_time": 23605.187, - "data": 77194.739, - "framework": 3585690.7630000003, - "kernel_overhead": 1700806.226, - "profiling_overhead": 64119.629, - "profiling_runs": 1743570.169, + "compilation_time": 15040.364, + "data": 57880.419, + "framework": 3575988.309, + "kernel_overhead": 1713805.869, + "profiling_overhead": 47670.539, + "profiling_runs": 1756631.482, "runtimes": [ - 6145.376 + 6692.736 ], - "search_algorithm": 45.985, - "validation": 32.586 + "search_algorithm": 26.475, + "validation": 15.541 }, - "timestamp": "2026-03-05 08:59:19 UTC" + "timestamp": "2026-03-13 09:40:43 UTC" }, { "compilation_data": { @@ -25716,14 +25716,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "0", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 0, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -25731,49 +25731,49 @@ { "name": "time", "unit": "", - "value": 5834.656 + "value": 5878.688 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.709511611257774 + "value": 8.186485294621349 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2404.0 + "value": 4432.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835780.0 + "value": 1831164.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0818129541174824 + "value": 1.077164795039242 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 101199.0 + "value": 100195.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103894.0 + "value": 2101108.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.4415511537539678 + "value": 0.44152929626775533 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -25785,7 +25785,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020690157516283985 + "value": 0.02069226539689442 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -25815,13 +25815,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.83273272727429 + "value": 81.83386293428686 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95715059622778 + "value": 99.97391975530049 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -25881,7 +25881,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.01305576636728 + "value": 44.0100996023888 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -25893,13 +25893,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.261071429506924 + "value": 28.259209759615306 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.34153394427748845 + "value": 0.3415114460695698 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -25911,7 +25911,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.64336570877545 + "value": 94.63713159255578 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -25930,19 +25930,19 @@ "time" ], "times": { - "compilation_time": 24399.104, - "data": 78707.851, - "framework": 3488979.1040000003, - "kernel_overhead": 1651780.17, - "profiling_overhead": 65385.449, - "profiling_runs": 1693105.634, + "compilation_time": 15155.985, + "data": 58026.427, + "framework": 3482989.927, + "kernel_overhead": 1668238.879, + "profiling_overhead": 47743.108, + "profiling_runs": 1708981.513, "runtimes": [ - 5834.656 + 5878.688 ], - "search_algorithm": 46.221, - "validation": 32.433 + "search_algorithm": 32.325, + "validation": 15.167 }, - "timestamp": "2026-03-05 08:59:20 UTC" + "timestamp": "2026-03-13 09:40:45 UTC" }, { "compilation_data": { @@ -25963,14 +25963,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -25978,49 +25978,49 @@ { "name": "time", "unit": "", - "value": 6838.304 + "value": 6956.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.943831679351948 + "value": 7.091559289527958 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 56.0 + "value": 9992.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871068.0 + "value": 1873744.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9117730596858147 + "value": 0.9272932330576963 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115017.0 + "value": 124361.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099184.0 + "value": 2105209.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.98036528139767 + "value": 47.985931610952925 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -26032,7 +26032,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5622650615557956 + "value": 0.5621981774882934 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26062,13 +26062,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.02260925906698 + "value": 65.02050021532719 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9539028794693 + "value": 99.95307455416726 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26128,7 +26128,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.375918425583322 + "value": 21.373269204175543 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26140,13 +26140,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.001039747935177 + "value": 23.998383587991835 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.376055993996662 + "value": 24.37335833155421 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26158,7 +26158,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.205507498912674 + "value": 47.20033563403627 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26177,19 +26177,19 @@ "time" ], "times": { - "compilation_time": 23143.396, - "data": 77807.367, - "framework": 543511.4839999999, - "kernel_overhead": 176446.408, - "profiling_overhead": 64830.78, - "profiling_runs": 224426.929, + "compilation_time": 13999.331, + "data": 58876.247, + "framework": 497764.01600000006, + "kernel_overhead": 171167.444, + "profiling_overhead": 48697.512, + "profiling_runs": 219022.813, "runtimes": [ - 6838.304 + 6956.864 ], - "search_algorithm": 47.617, - "validation": 28.578 + "search_algorithm": 24.447, + "validation": 17.109 }, - "timestamp": "2026-03-05 08:59:21 UTC" + "timestamp": "2026-03-13 09:40:45 UTC" }, { "compilation_data": { @@ -26210,14 +26210,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -26225,49 +26225,49 @@ { "name": "time", "unit": "", - "value": 3652.224 + "value": 3690.112 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.583776723372605 + "value": 13.177031065194797 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6388.0 + "value": 9112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869680.0 + "value": 1873268.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.71537686223448 + "value": 1.7178596261215822 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68051.0 + "value": 74101.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2102140.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.68764072368542 + "value": 45.688046324691534 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -26279,7 +26279,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5352247977907167 + "value": 0.5352173087134678 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26309,13 +26309,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.9308451133215 + "value": 73.13309305457548 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89637417377013 + "value": 99.89190925234382 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26375,7 +26375,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.976209062225333 + "value": 28.97735510171988 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26387,13 +26387,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.71989369573479 + "value": 45.72129750252451 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.306430184739803 + "value": 23.307145797185342 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26405,7 +26405,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.88246048535434 + "value": 56.88429270678098 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26424,19 +26424,19 @@ "time" ], "times": { - "compilation_time": 22928.35, - "data": 77940.253, - "framework": 360267.857, - "kernel_overhead": 93488.077, - "profiling_overhead": 64768.362, - "profiling_runs": 124071.165, + "compilation_time": 15310.708, + "data": 57917.926, + "framework": 319668.113, + "kernel_overhead": 91985.807, + "profiling_overhead": 47552.856, + "profiling_runs": 122211.524, "runtimes": [ - 3652.224 + 3690.112 ], - "search_algorithm": 43.324, - "validation": 27.699 + "search_algorithm": 34.395, + "validation": 18.469 }, - "timestamp": "2026-03-05 08:59:21 UTC" + "timestamp": "2026-03-13 09:40:45 UTC" }, { "compilation_data": { @@ -26457,14 +26457,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -26472,49 +26472,49 @@ { "name": "time", "unit": "", - "value": 2251.36 + "value": 2324.032 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.10725421681977 + "value": 20.908456951094394 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7880.0 + "value": 5884.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874060.0 + "value": 1872224.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.808512695675129 + "value": 2.8008081369040703 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46562.0 + "value": 46697.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104562.0 + "value": 2101073.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.926675254859596 + "value": 37.927957927795156 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -26526,7 +26526,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4442008794367671 + "value": 0.4442090689069919 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26556,13 +26556,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.6732455284196 + "value": 96.26798087956222 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90418523905078 + "value": 99.92508598010313 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26622,7 +26622,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23965031732271 + "value": 43.23194693631487 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26634,13 +26634,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.88299052985222 + "value": 75.86851724956774 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.48947901303822 + "value": 19.485761754527655 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26652,7 +26652,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.54588266337716 + "value": 64.53372491391114 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26671,19 +26671,19 @@ "time" ], "times": { - "compilation_time": 23380.564, - "data": 79301.446, - "framework": 235776.941, - "kernel_overhead": 33754.572, - "profiling_overhead": 65442.291, - "profiling_runs": 57278.632, + "compilation_time": 14840.252, + "data": 57249.005, + "framework": 190940.385, + "kernel_overhead": 31589.186, + "profiling_overhead": 47343.124, + "profiling_runs": 54759.07, "runtimes": [ - 2251.36 + 2324.032 ], - "search_algorithm": 36.645, - "validation": 24.078 + "search_algorithm": 33.301, + "validation": 16.524 }, - "timestamp": "2026-03-05 08:59:21 UTC" + "timestamp": "2026-03-13 09:40:46 UTC" }, { "compilation_data": { @@ -26704,14 +26704,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -26719,49 +26719,49 @@ { "name": "time", "unit": "", - "value": 2009.088 + "value": 2008.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 32.736522346368716 + "value": 24.971648185483872 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3516.0 + "value": 488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871632.0 + "value": 1870284.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1707439514822613 + "value": 3.1572367538532986 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37526.0 + "value": 35634.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099558.0 + "value": 2099304.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.53581128767231 + "value": 21.53325312940483 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -26773,7 +26773,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2521758581839152 + "value": 0.2521377487123544 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -26803,13 +26803,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.98813474912558 + "value": 98.14832081805851 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89026307310597 + "value": 99.90158339397559 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -26869,7 +26869,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.63178340669749 + "value": 43.620422469694994 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -26881,13 +26881,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.17058721440534 + "value": 86.14780197001382 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.23415370422179 + "value": 11.231183166989107 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -26899,7 +26899,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.725691971415586 + "value": 61.70953283958999 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -26918,19 +26918,19 @@ "time" ], "times": { - "compilation_time": 23496.191, - "data": 77507.701, - "framework": 228335.039, - "kernel_overhead": 32061.853, - "profiling_overhead": 64488.781, - "profiling_runs": 54276.704, + "compilation_time": 14767.924, + "data": 57261.038, + "framework": 186393.05, + "kernel_overhead": 30138.221, + "profiling_overhead": 47270.288, + "profiling_runs": 51723.503, "runtimes": [ - 2009.088 + 2008.544 ], - "search_algorithm": 47.791, - "validation": 28.562 + "search_algorithm": 26.089, + "validation": 15.662 }, - "timestamp": "2026-03-05 08:59:21 UTC" + "timestamp": "2026-03-13 09:40:46 UTC" }, { "compilation_data": { @@ -26951,14 +26951,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -26966,49 +26966,49 @@ { "name": "time", "unit": "", - "value": 1881.568 + "value": 1885.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.0687637467725 + "value": 26.541428882992257 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6568.0 + "value": 216.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871008.0 + "value": 1869048.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.378528909941584 + "value": 3.3561356533942335 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37575.0 + "value": 31256.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100390.0 + "value": 2098991.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.45270438714915 + "value": 11.450177538596641 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -27020,7 +27020,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13411828036090565 + "value": 0.13410566321405698 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27050,13 +27050,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.25203583415436 + "value": 98.29599356027428 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9069409811896 + "value": 99.92646008795883 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27116,7 +27116,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78954299449883 + "value": 37.779530933833634 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27128,13 +27128,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.64336180634677 + "value": 91.61684103177258 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.1528135978382235 + "value": 6.151033028256216 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27146,7 +27146,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.50787591139473 + "value": 66.48880625743924 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27165,19 +27165,19 @@ "time" ], "times": { - "compilation_time": 23565.287, - "data": 78677.587, - "framework": 237094.80099999998, - "kernel_overhead": 35672.401, - "profiling_overhead": 65580.285, - "profiling_runs": 57164.528, + "compilation_time": 15027.008, + "data": 57492.809, + "framework": 194400.51799999998, + "kernel_overhead": 34031.83, + "profiling_overhead": 47886.766, + "profiling_runs": 54989.113, "runtimes": [ - 1881.568 + 1885.536 ], - "search_algorithm": 35.836, - "validation": 30.09 + "search_algorithm": 20.941, + "validation": 15.338 }, - "timestamp": "2026-03-05 08:59:22 UTC" + "timestamp": "2026-03-13 09:40:46 UTC" }, { "compilation_data": { @@ -27198,14 +27198,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -27213,49 +27213,49 @@ { "name": "time", "unit": "", - "value": 1811.744 + "value": 1784.352 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.950265145922536 + "value": 27.78061540366518 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1508.0 + "value": 532.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868504.0 + "value": 1866112.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4884482477989205 + "value": 3.4999918462164867 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 29927.0 + "value": 29154.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099302.0 + "value": 2099634.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.931196964691672 + "value": 5.930354155752191 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -27267,7 +27267,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06944808665095645 + "value": 0.0694532209270332 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27297,13 +27297,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.69814408800968 + "value": 81.7165734520455 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.88874205144946 + "value": 99.92027405535595 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27363,7 +27363,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.64608182096902 + "value": 37.6418134163153 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27375,13 +27375,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.92539969312452 + "value": 94.90245952138653 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.37198380257559 + "value": 3.371168911221128 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27393,7 +27393,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.686422969761196 + "value": 58.67238615804046 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27412,19 +27412,19 @@ "time" ], "times": { - "compilation_time": 24273.365, - "data": 77696.767, - "framework": 209756.816, - "kernel_overhead": 23103.973, - "profiling_overhead": 64590.114, - "profiling_runs": 44365.962, + "compilation_time": 15493.764, + "data": 57389.514, + "framework": 168132.216, + "kernel_overhead": 21212.9, + "profiling_overhead": 47844.708, + "profiling_runs": 41685.094, "runtimes": [ - 1811.744 + 1784.352 ], - "search_algorithm": 42.621, - "validation": 23.942 + "search_algorithm": 24.405, + "validation": 15.25 }, - "timestamp": "2026-03-05 08:59:22 UTC" + "timestamp": "2026-03-13 09:40:46 UTC" }, { "compilation_data": { @@ -27445,14 +27445,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -27460,49 +27460,49 @@ { "name": "time", "unit": "", - "value": 6219.296 + "value": 6246.688 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.706120208990015 + "value": 7.836189739637224 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5160.0 + "value": 22820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870988.0 + "value": 1877204.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9985791993776746 + "value": 1.0214725938776774 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109368.0 + "value": 133052.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100326.0 + "value": 2108568.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.28561285914418 + "value": 26.284808788742293 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -27514,7 +27514,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3079684004070406 + "value": 0.30795683278752845 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27544,13 +27544,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.02300693607765 + "value": 98.28837896116802 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97085726752981 + "value": 99.97143879253004 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27610,7 +27610,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02510372501284 + "value": 43.02333002957223 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27622,13 +27622,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.28763110207229 + "value": 26.286490803710784 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400530698517318 + "value": 13.39994941361038 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27640,7 +27640,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91571439953724 + "value": 79.91229963766371 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27659,19 +27659,19 @@ "time" ], "times": { - "compilation_time": 23506.024, - "data": 77793.292, - "framework": 2192436.9220000003, - "kernel_overhead": 1002712.253, - "profiling_overhead": 64599.224, - "profiling_runs": 1047332.153, + "compilation_time": 15765.185, + "data": 57693.839, + "framework": 2169476.482, + "kernel_overhead": 1009587.033, + "profiling_overhead": 47952.091, + "profiling_runs": 1054243.519, "runtimes": [ - 6219.296 + 6246.688 ], - "search_algorithm": 47.317, - "validation": 30.109 + "search_algorithm": 24.394, + "validation": 15.474 }, - "timestamp": "2026-03-05 08:59:23 UTC" + "timestamp": "2026-03-13 09:40:47 UTC" }, { "compilation_data": { @@ -27692,14 +27692,14 @@ "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -27707,49 +27707,49 @@ { "name": "time", "unit": "", - "value": 6230.464 + "value": 6392.16 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.348258072902029 + "value": 7.8096528910735845 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4740.0 + "value": 12836.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1880504.0 + "value": 1874356.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.043259619567179 + "value": 1.0190165212624498 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111197.0 + "value": 118847.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2139178.0 + "value": 2109336.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.384641876047414 + "value": 13.383773483256151 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -27761,7 +27761,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15681331055065298 + "value": 0.15681206483032908 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -27791,13 +27791,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.85156697691373 + "value": 98.79373301786582 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97233456374398 + "value": 99.97551994882552 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -27857,7 +27857,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.23404201167492 + "value": 36.232821485889374 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -27869,13 +27869,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.77021109633123 + "value": 26.769145498226433 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875552263999135 + "value": 6.87527858011089 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -27887,7 +27887,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.50037668361028 + "value": 83.4971041779249 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -27906,19 +27906,19 @@ "time" ], "times": { - "compilation_time": 23758.457, - "data": 76694.62, - "framework": 2768185.393, - "kernel_overhead": 1292173.642, - "profiling_overhead": 63650.278, - "profiling_runs": 1335666.853, + "compilation_time": 14440.326, + "data": 58278.51, + "framework": 2754750.534, + "kernel_overhead": 1302327.149, + "profiling_overhead": 48199.578, + "profiling_runs": 1345945.297, "runtimes": [ - 6230.464 + 6392.16 ], - "search_algorithm": 46.036, - "validation": 31.555 + "search_algorithm": 30.127, + "validation": 15.414 }, - "timestamp": "2026-03-05 08:59:24 UTC" + "timestamp": "2026-03-13 09:40:49 UTC" }, { "compilation_data": { @@ -27939,14 +27939,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -27954,49 +27954,49 @@ { "name": "time", "unit": "", - "value": 8069.248 + "value": 8159.872 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.565581798672456 + "value": 6.19687734698657 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5436.0 + "value": 27536.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1934936.0 + "value": 1936888.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.79736433816041 + "value": 48.497669266090185 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 847850.0 + "value": 763505.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415199.0 + "value": 138421065.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.139789570159697 + "value": 5.128552136146367 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28008,7 +28008,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05983973149257982 + "value": 0.05993807764419942 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28038,13 +28038,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.2039972968962 + "value": 93.20929482105498 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.68922544314479 + "value": 99.30702062354234 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28104,7 +28104,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.314619525187922 + "value": 12.38187895391247 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28116,13 +28116,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.488969520365025 + "value": 20.60162887520626 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.015697922849071 + "value": 13.087265218087571 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28134,7 +28134,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.08933150805714 + "value": 32.26583497650618 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28153,19 +28153,19 @@ "time" ], "times": { - "compilation_time": 23476.114, - "data": 77612.241, - "framework": 953892.8030000001, - "kernel_overhead": 378516.706, - "profiling_overhead": 64645.08, - "profiling_runs": 433118.776, + "compilation_time": 14000.099, + "data": 58775.641, + "framework": 928984.1769999999, + "kernel_overhead": 383484.432, + "profiling_overhead": 48639.899, + "profiling_runs": 438084.205, "runtimes": [ - 8069.248 + 8159.872 ], - "search_algorithm": 45.023, - "validation": 26.201 + "search_algorithm": 25.951, + "validation": 14.318 }, - "timestamp": "2026-03-05 08:59:25 UTC" + "timestamp": "2026-03-13 09:40:49 UTC" }, { "compilation_data": { @@ -28186,14 +28186,14 @@ "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -28201,49 +28201,49 @@ { "name": "time", "unit": "", - "value": 8128.928 + "value": 8326.815 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.931978389342414 + "value": 6.1705179750351045 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6388.0 + "value": 6740.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2020848.0 + "value": 2029448.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.2846779232238 + "value": 54.24772351483889 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 17099308.0 + "value": 17211968.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415166.0 + "value": 138425089.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.547041290054688 + "value": 2.576918862712292 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28255,7 +28255,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029837970206029144 + "value": 0.02992657613909857 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28285,13 +28285,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.57056363845184 + "value": 88.78623432607272 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.13020392484891 + "value": 99.07521113847291 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28351,7 +28351,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.087630709737056 + "value": 10.124178555010122 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28363,13 +28363,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.548114352807506 + "value": 20.62057273748397 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.753962873200193 + "value": 11.795410625958237 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28381,7 +28381,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.059115540481677 + "value": 29.16164362653782 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28400,19 +28400,19 @@ "time" ], "times": { - "compilation_time": 23237.85, - "data": 78323.643, - "framework": 925576.61, - "kernel_overhead": 363636.012, - "profiling_overhead": 65227.697, - "profiling_runs": 418389.258, + "compilation_time": 14715.837, + "data": 59351.273, + "framework": 895376.1560000001, + "kernel_overhead": 365622.03, + "profiling_overhead": 49096.226, + "profiling_runs": 421306.627, "runtimes": [ - 8128.928 + 8326.815 ], - "search_algorithm": 43.257, - "validation": 34.214 + "search_algorithm": 26.506, + "validation": 16.315 }, - "timestamp": "2026-03-05 08:59:25 UTC" + "timestamp": "2026-03-13 09:40:50 UTC" }, { "compilation_data": { @@ -28433,14 +28433,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -28448,49 +28448,49 @@ { "name": "time", "unit": "", - "value": 9271.264 + "value": 9648.192 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.361336209678164 + "value": 5.675440261778125 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 23072.0 + "value": 6100.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2298752.0 + "value": 2299440.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 74.47694099175209 + "value": 74.71883278111405 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 106865410.0 + "value": 108341756.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138426126.0 + "value": 138417100.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1190299254804366 + "value": 1.128991814877712 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28502,7 +28502,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012841988486605517 + "value": 0.013096344632014244 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28532,13 +28532,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.22626648213634 + "value": 92.09760452940903 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 96.9623836936803 + "value": 99.53536486258409 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28598,7 +28598,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.884199828722608 + "value": 7.8307165863572 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28610,13 +28610,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.082883566927187 + "value": 17.96434452779269 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.772085638523762 + "value": 9.708026516667267 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28628,7 +28628,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.19869194179137 + "value": 24.04011343549971 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28647,19 +28647,19 @@ "time" ], "times": { - "compilation_time": 23628.984, - "data": 78136.155, - "framework": 907625.223, - "kernel_overhead": 351026.608, - "profiling_overhead": 64951.105, - "profiling_runs": 413511.355, + "compilation_time": 14392.754, + "data": 58429.02, + "framework": 877806.395, + "kernel_overhead": 353870.142, + "profiling_overhead": 48339.623, + "profiling_runs": 417167.61, "runtimes": [ - 9271.264 + 9648.192 ], - "search_algorithm": 44.972, - "validation": 31.883 + "search_algorithm": 27.976, + "validation": 15.225 }, - "timestamp": "2026-03-05 08:59:26 UTC" + "timestamp": "2026-03-13 09:40:50 UTC" }, { "compilation_data": { @@ -28680,14 +28680,14 @@ "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -28695,49 +28695,49 @@ { "name": "time", "unit": "", - "value": 3974.368 + "value": 4118.144 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.10353470225422 + "value": 12.185359198500507 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5812.0 + "value": 8892.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872360.0 + "value": 1871732.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5722327765400093 + "value": 1.55819996509876 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 72505.0 + "value": 76219.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100130.0 + "value": 2100796.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.896644579786024 + "value": 20.894930416119603 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28749,7 +28749,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24481447752490332 + "value": 0.24476922313857963 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -28779,13 +28779,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71323469844259 + "value": 98.6122816945135 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9660497165348 + "value": 99.95412506755157 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -28845,7 +28845,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.61192487478861 + "value": 39.60904172433298 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -28857,13 +28857,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79586064408222 + "value": 41.7931199812104 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.73467905214221 + "value": 10.733975151424156 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -28875,7 +28875,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34741284248946 + "value": 83.34202787349986 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -28894,19 +28894,19 @@ "time" ], "times": { - "compilation_time": 23005.766, - "data": 78419.209, - "framework": 1408039.665, - "kernel_overhead": 616126.989, - "profiling_overhead": 65189.269, - "profiling_runs": 648304.198, + "compilation_time": 14649.823, + "data": 57822.417, + "framework": 1383010.094, + "kernel_overhead": 623065.003, + "profiling_overhead": 47501.27, + "profiling_runs": 654621.404, "runtimes": [ - 3974.368 + 4118.144 ], - "search_algorithm": 52.353, - "validation": 28.92 + "search_algorithm": 25.482, + "validation": 17.22 }, - "timestamp": "2026-03-05 08:59:27 UTC" + "timestamp": "2026-03-13 09:40:51 UTC" }, { "compilation_data": { @@ -28927,14 +28927,14 @@ "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -28942,49 +28942,49 @@ { "name": "time", "unit": "", - "value": 4189.664 + "value": 4286.4 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.160931210838624 + "value": 11.526096300970108 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 208.0 + "value": 476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870452.0 + "value": 1870108.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4753196535093334 + "value": 1.4779955857288525 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 70909.0 + "value": 72303.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099014.0 + "value": 2099100.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.818873582111486 + "value": 9.81808717905584 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -28996,7 +28996,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11503670539668939 + "value": 0.11502286837716381 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29026,13 +29026,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.9485235904877 + "value": 98.92833054832015 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96939773678764 + "value": 99.96532619930389 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29092,7 +29092,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.266983540925125 + "value": 43.263528391220646 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29104,13 +29104,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27788202957959 + "value": 39.27475712374755 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.12070044037976 + "value": 5.120293042988572 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29122,7 +29122,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.9360298967861 + "value": 90.92887003521024 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29141,19 +29141,19 @@ "time" ], "times": { - "compilation_time": 23162.434, - "data": 78210.365, - "framework": 1914000.427, - "kernel_overhead": 869198.536, - "profiling_overhead": 64361.405, - "profiling_runs": 902230.121, + "compilation_time": 14859.68, + "data": 58589.051, + "framework": 1896046.875, + "kernel_overhead": 878184.64, + "profiling_overhead": 48406.804, + "profiling_runs": 910866.38, "runtimes": [ - 4189.664 + 4286.4 ], - "search_algorithm": 46.706, - "validation": 34.87 + "search_algorithm": 25.964, + "validation": 15.025 }, - "timestamp": "2026-03-05 08:59:28 UTC" + "timestamp": "2026-03-13 09:40:52 UTC" }, { "compilation_data": { @@ -29174,14 +29174,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -29189,49 +29189,49 @@ { "name": "time", "unit": "", - "value": 6884.192 + "value": 7028.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.414195238822497 + "value": 7.110933968551393 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 18164.0 + "value": 13308.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1875472.0 + "value": 1871424.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9398529123279893 + "value": 0.9291264879970522 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132228.0 + "value": 125969.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108635.0 + "value": 2102048.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0412393122125994 + "value": 3.0411083640685184 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -29243,7 +29243,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03563363094432863 + "value": 0.035631692523083104 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29273,13 +29273,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.85747023008193 + "value": 98.84925407232556 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98186238615355 + "value": 99.97812555013023 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29339,7 +29339,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.477650768245375 + "value": 36.47693967543911 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29351,13 +29351,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.330304994762624 + "value": 24.329890792189442 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6335043636620412 + "value": 1.6334765546513907 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29369,7 +29369,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.9778570826454 + "value": 87.97640682830651 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29388,19 +29388,19 @@ "time" ], "times": { - "compilation_time": 23297.47, - "data": 77793.191, - "framework": 3137646.5700000003, - "kernel_overhead": 1474411.462, - "profiling_overhead": 64708.816, - "profiling_runs": 1520733.101, + "compilation_time": 14534.173, + "data": 59058.714, + "framework": 3133892.807, + "kernel_overhead": 1490003.832, + "profiling_overhead": 48707.027, + "profiling_runs": 1536123.234, "runtimes": [ - 6884.192 + 7028.928 ], - "search_algorithm": 45.189, - "validation": 27.508 + "search_algorithm": 24.24, + "validation": 14.313 }, - "timestamp": "2026-03-05 08:59:29 UTC" + "timestamp": "2026-03-13 09:40:54 UTC" }, { "compilation_data": { @@ -29421,14 +29421,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -29436,49 +29436,49 @@ { "name": "time", "unit": "", - "value": 9372.608 + "value": 9622.432 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.149235813789296 + "value": 6.0271896337156505 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 27184.0 + "value": 16820.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2277592.0 + "value": 2276868.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.86373883703486 + "value": 81.83999645906262 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133176230.0 + "value": 133237647.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138429726.0 + "value": 138417423.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.117854555393024 + "value": 1.1020887542971285 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -29490,7 +29490,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01276277179235494 + "value": 0.012786754254545857 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29520,13 +29520,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.89073272623502 + "value": 98.89438065367416 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.87694782352884 + "value": 98.83071464324571 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29586,7 +29586,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.758059622644825 + "value": 7.698805569295245 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29598,13 +29598,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.80341351187871 + "value": 17.664732944503108 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.170205169038019 + "value": 5.129931600948838 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29616,7 +29616,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.148417865654356 + "value": 17.0148896951419 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29635,19 +29635,19 @@ "time" ], "times": { - "compilation_time": 40445.627, - "data": 78500.053, - "framework": 587405.115, - "kernel_overhead": 190423.082, - "profiling_overhead": 65360.741, - "profiling_runs": 253121.239, + "compilation_time": 14543.556, + "data": 60412.184, + "framework": 557526.919, + "kernel_overhead": 192495.484, + "profiling_overhead": 50171.465, + "profiling_runs": 254447.786, "runtimes": [ - 9372.608 + 9622.432 ], - "search_algorithm": 53.216, - "validation": 25.378 + "search_algorithm": 35.059, + "validation": 17.073 }, - "timestamp": "2026-03-05 08:59:30 UTC" + "timestamp": "2026-03-13 09:40:54 UTC" }, { "compilation_data": { @@ -29668,14 +29668,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -29683,49 +29683,49 @@ { "name": "time", "unit": "", - "value": 2808.512 + "value": 2853.792 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.83924725309027 + "value": 17.237266289284538 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3080.0 + "value": 5772.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870016.0 + "value": 1872908.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.213543553120114 + "value": 2.2231514068603184 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 50927.0 + "value": 57304.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100006.0 + "value": 2107697.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.86013311184577 + "value": 14.85870108071237 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -29737,7 +29737,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17407550014832116 + "value": 0.17403877674024437 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -29767,13 +29767,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.80407288238271 + "value": 98.84418834915762 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95424616971567 + "value": 99.94343128963116 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -29833,7 +29833,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.94868359175196 + "value": 45.94277900113985 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -29845,13 +29845,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.44496906755335 + "value": 59.4388595903316 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.749905635271849 + "value": 7.749109136044208 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -29863,7 +29863,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.39892525202075 + "value": 88.38995494257121 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -29882,19 +29882,19 @@ "time" ], "times": { - "compilation_time": 24516.89, - "data": 76415.591, - "framework": 1185206.882, - "kernel_overhead": 509670.947, - "profiling_overhead": 63125.336, - "profiling_runs": 535995.008, + "compilation_time": 15056.467, + "data": 58768.045, + "framework": 1160671.72, + "kernel_overhead": 513648.29, + "profiling_overhead": 48537.599, + "profiling_runs": 539717.786, "runtimes": [ - 2808.512 + 2853.792 ], - "search_algorithm": 46.881, - "validation": 25.841 + "search_algorithm": 37.615, + "validation": 17.587 }, - "timestamp": "2026-03-05 08:59:30 UTC" + "timestamp": "2026-03-13 09:40:55 UTC" }, { "compilation_data": { @@ -29915,14 +29915,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -29930,49 +29930,49 @@ { "name": "time", "unit": "", - "value": 3469.856 + "value": 3615.264 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.575869187447676 + "value": 13.93466396363775 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4064.0 + "value": 2488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870476.0 + "value": 1869332.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8081179990979117 + "value": 1.7847536960234267 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61419.0 + "value": 60632.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2099650.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037895557232067 + "value": 6.037428798517981 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -29984,7 +29984,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07073613183538675 + "value": 0.07072681239183483 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30014,13 +30014,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.72127137578069 + "value": 98.70608642477352 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9677258843067 + "value": 99.96459396518638 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30080,7 +30080,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.22546223849266 + "value": 42.22124827013381 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30092,13 +30092,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30478927653387 + "value": 48.29993835248061 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.243119397228226 + "value": 3.2427937126299238 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30110,7 +30110,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.41184901988609 + "value": 92.40266444487987 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30129,19 +30129,19 @@ "time" ], "times": { - "compilation_time": 24854.452, - "data": 79090.156, - "framework": 1866341.435, - "kernel_overhead": 845952.838, - "profiling_overhead": 65870.211, - "profiling_runs": 875428.23, + "compilation_time": 14226.304, + "data": 57655.842, + "framework": 1844124.62, + "kernel_overhead": 854819.172, + "profiling_overhead": 47324.53, + "profiling_runs": 884325.076, "runtimes": [ - 3469.856 + 3615.264 ], - "search_algorithm": 45.143, - "validation": 28.093 + "search_algorithm": 25.862, + "validation": 16.271 }, - "timestamp": "2026-03-05 08:59:31 UTC" + "timestamp": "2026-03-13 09:40:55 UTC" }, { "compilation_data": { @@ -30162,14 +30162,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -30177,49 +30177,49 @@ { "name": "time", "unit": "", - "value": 5622.112 + "value": 5780.352 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.478284904543898 + "value": 8.68103626789729 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 15884.0 + "value": 10588.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871124.0 + "value": 1867516.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1375251465747094 + "value": 1.1262971238006843 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108800.0 + "value": 102050.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106172.0 + "value": 2102171.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8629646311329775 + "value": 1.862821978920939 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -30231,7 +30231,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02182724601615458 + "value": 0.021826389191589125 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30261,13 +30261,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90260667447761 + "value": 73.90563897840381 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97456874047766 + "value": 99.9777318830934 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30327,7 +30327,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34648464678536 + "value": 42.34344863971165 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30339,13 +30339,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.80904737692261 + "value": 29.806934152526377 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588907210308203 + "value": 1.0588156540997529 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30357,7 +30357,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.4523744806733 + "value": 96.44556919790958 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30376,19 +30376,19 @@ "time" ], "times": { - "compilation_time": 23752.737, - "data": 78275.375, - "framework": 3331901.351, - "kernel_overhead": 1574261.209, - "profiling_overhead": 65167.57, - "profiling_runs": 1614197.197, + "compilation_time": 14675.958, + "data": 57963.312, + "framework": 3325535.6059999997, + "kernel_overhead": 1589968.295, + "profiling_overhead": 47880.774, + "profiling_runs": 1629723.225, "runtimes": [ - 5622.112 + 5780.352 ], - "search_algorithm": 45.273, - "validation": 28.349 + "search_algorithm": 27.75, + "validation": 18.505 }, - "timestamp": "2026-03-05 08:59:33 UTC" + "timestamp": "2026-03-13 09:40:57 UTC" }, { "compilation_data": { @@ -30409,14 +30409,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -30424,49 +30424,49 @@ { "name": "time", "unit": "", - "value": 3345.248 + "value": 3436.736 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.243322083562255 + "value": 14.525552226704635 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11340.0 + "value": 8940.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871636.0 + "value": 1871452.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8699403382849737 + "value": 1.8516979438790577 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 66733.0 + "value": 66562.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101228.0 + "value": 2101448.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.227395729805341 + "value": 6.226675273820059 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -30478,7 +30478,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07294971409031327 + "value": 0.07295366830553172 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30508,13 +30508,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.73949215200489 + "value": 98.7372014713465 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95563439031659 + "value": 99.9748663317574 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30574,7 +30574,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.773779162971366 + "value": 42.76827871426585 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30586,13 +30586,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.82244218255398 + "value": 49.81555804033405 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3450125977056504 + "value": 3.3445504055400055 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30604,7 +30604,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.86594019005594 + "value": 89.8536166277735 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30623,19 +30623,19 @@ "time" ], "times": { - "compilation_time": 24416.304, - "data": 77202.174, - "framework": 1737263.253, - "kernel_overhead": 783892.958, - "profiling_overhead": 63493.578, - "profiling_runs": 812674.543, + "compilation_time": 15196.694, + "data": 57398.952, + "framework": 1719311.7880000002, + "kernel_overhead": 792956.828, + "profiling_overhead": 47394.221, + "profiling_runs": 821561.787, "runtimes": [ - 3345.248 + 3436.736 ], - "search_algorithm": 49.145, - "validation": 26.864 + "search_algorithm": 26.701, + "validation": 16.324 }, - "timestamp": "2026-03-05 08:59:34 UTC" + "timestamp": "2026-03-13 09:40:58 UTC" }, { "compilation_data": { @@ -30656,14 +30656,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -30671,49 +30671,49 @@ { "name": "time", "unit": "", - "value": 6299.328 + "value": 6490.752 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.209754037516996 + "value": 7.756123924982411 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13740.0 + "value": 18312.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870056.0 + "value": 1872564.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.015314226136501 + "value": 1.0137894446124338 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115303.0 + "value": 121957.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104184.0 + "value": 2106415.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6607401401324724 + "value": 1.66062747935063 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -30725,7 +30725,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0194570587558522 + "value": 0.01945603349680434 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -30755,13 +30755,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.03436036350742 + "value": 82.03314900107425 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96723964442859 + "value": 99.97223402511291 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -30821,7 +30821,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.247834443875774 + "value": 43.24343689193959 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -30833,13 +30833,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.57407664999056 + "value": 26.571348861688953 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9439765997494206 + "value": 0.9438797019960308 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -30851,7 +30851,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.28279058433442 + "value": 94.27315120469798 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -30870,19 +30870,19 @@ "time" ], "times": { - "compilation_time": 23571.895, - "data": 76591.537, - "framework": 3684132.2350000003, - "kernel_overhead": 1750433.176, - "profiling_overhead": 63351.996, - "profiling_runs": 1793755.526, + "compilation_time": 14907.995, + "data": 57675.395, + "framework": 3691663.864, + "kernel_overhead": 1771533.531, + "profiling_overhead": 47611.512, + "profiling_runs": 1814843.426, "runtimes": [ - 6299.328 + 6490.752 ], - "search_algorithm": 53.185, - "validation": 26.114 + "search_algorithm": 25.866, + "validation": 16.718 }, - "timestamp": "2026-03-05 08:59:36 UTC" + "timestamp": "2026-03-13 09:41:0 UTC" }, { "compilation_data": { @@ -30903,14 +30903,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -30918,49 +30918,49 @@ { "name": "time", "unit": "", - "value": 5934.016 + "value": 6031.904 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.770157098724763 + "value": 8.208810571454164 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12616.0 + "value": 6156.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870552.0 + "value": 1870780.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0668700972522296 + "value": 1.0663728645388455 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108702.0 + "value": 102622.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101509.0 + "value": 2103339.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7422890190660063 + "value": 1.7421164611822262 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -30972,7 +30972,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020412495876137613 + "value": 0.02041119861209165 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31002,13 +31002,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.0262258476386 + "value": 82.01709649949342 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97345476426125 + "value": 99.96729254087963 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31068,7 +31068,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.0617771643758 + "value": 44.06157599950532 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31080,13 +31080,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.877261120901935 + "value": 27.877207764709343 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902689192117266 + "value": 0.9902670238684593 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31098,7 +31098,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98614256816603 + "value": 94.98600407344368 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31117,19 +31117,19 @@ "time" ], "times": { - "compilation_time": 23440.068, - "data": 77234.746, - "framework": 3597758.283, - "kernel_overhead": 1707234.07, - "profiling_overhead": 64199.72, - "profiling_runs": 1749089.747, + "compilation_time": 15835.649, + "data": 58642.32, + "framework": 3600016.89, + "kernel_overhead": 1725655.225, + "profiling_overhead": 48519.537, + "profiling_runs": 1767199.808, "runtimes": [ - 5934.016 + 6031.904 ], - "search_algorithm": 53.649, - "validation": 29.8 + "search_algorithm": 76.473, + "validation": 16.496 }, - "timestamp": "2026-03-05 08:59:38 UTC" + "timestamp": "2026-03-13 09:41:2 UTC" }, { "compilation_data": { @@ -31150,14 +31150,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -31165,49 +31165,49 @@ { "name": "time", "unit": "", - "value": 6841.664 + "value": 6998.944 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.343634285678053 + "value": 7.104357300575638 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14672.0 + "value": 14648.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871588.0 + "value": 1871560.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.926818620888463 + "value": 0.9286597821081216 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 129072.0 + "value": 129798.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102179.0 + "value": 2102280.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.01261161198408 + "value": 48.012876110896066 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -31219,7 +31219,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5624837838318012 + "value": 0.562476783757018 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31249,13 +31249,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.07431401675503 + "value": 80.02497749666871 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93492719378561 + "value": 99.94021370415773 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31315,7 +31315,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.387068601207 + "value": 21.387433625700748 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31327,13 +31327,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.01493530246874 + "value": 24.01336614242304 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.390168666569814 + "value": 24.3885749883984 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31345,7 +31345,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.23289128404697 + "value": 47.229819978665525 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31364,19 +31364,19 @@ "time" ], "times": { - "compilation_time": 23885.913, - "data": 78423.869, - "framework": 542079.759, - "kernel_overhead": 175334.942, - "profiling_overhead": 65430.743, - "profiling_runs": 222890.205, + "compilation_time": 14187.319, + "data": 59699.046, + "framework": 508043.09199999995, + "kernel_overhead": 175445.973, + "profiling_overhead": 49572.419, + "profiling_runs": 223325.654, "runtimes": [ - 6841.664 + 6998.944 ], - "search_algorithm": 43.587, - "validation": 33.794 + "search_algorithm": 34.594, + "validation": 17.037 }, - "timestamp": "2026-03-05 08:59:38 UTC" + "timestamp": "2026-03-13 09:41:2 UTC" }, { "compilation_data": { @@ -31397,14 +31397,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -31412,49 +31412,49 @@ { "name": "time", "unit": "", - "value": 3655.872 + "value": 3703.584 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.655507844360216 + "value": 13.380200914867974 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9828.0 + "value": 5572.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1874300.0 + "value": 1870208.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7245464712907321 + "value": 1.7099487647300613 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 73623.0 + "value": 68363.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109065.0 + "value": 2101230.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.73379134048475 + "value": 45.733652647907455 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -31466,7 +31466,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5357223726053081 + "value": 0.5357640371006132 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31496,13 +31496,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.42922150455854 + "value": 92.89026769659606 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94332534523667 + "value": 99.94642628726379 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31562,7 +31562,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.9896728953351 + "value": 28.991046906449906 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31574,13 +31574,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.74089929241257 + "value": 45.74303740937751 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31713811585875 + "value": 23.318228054389706 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31592,7 +31592,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.90869106895323 + "value": 56.91137268541131 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31611,19 +31611,19 @@ "time" ], "times": { - "compilation_time": 23908.53, - "data": 77494.562, - "framework": 359527.917, - "kernel_overhead": 93523.173, - "profiling_overhead": 64515.821, - "profiling_runs": 123994.361, + "compilation_time": 14637.224, + "data": 57699.907, + "framework": 320098.083, + "kernel_overhead": 91968.069, + "profiling_overhead": 47623.558, + "profiling_runs": 122806.549, "runtimes": [ - 3655.872 + 3703.584 ], - "search_algorithm": 47.967, - "validation": 27.149 + "search_algorithm": 24.719, + "validation": 15.862 }, - "timestamp": "2026-03-05 08:59:39 UTC" + "timestamp": "2026-03-13 09:41:2 UTC" }, { "compilation_data": { @@ -31644,14 +31644,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -31659,49 +31659,49 @@ { "name": "time", "unit": "", - "value": 2197.824 + "value": 2230.272 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 29.24564779962359 + "value": 22.278007856512076 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 152.0 + "value": 2400.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869548.0 + "value": 1870464.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.8108240107389557 + "value": 2.813576136040202 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 38219.0 + "value": 41646.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2099625.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.92505617563906 + "value": 37.92384313564955 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -31713,7 +31713,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4441536353015392 + "value": 0.44409889765856914 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31743,13 +31743,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.7849438996359 + "value": 94.67073182844801 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89092078022193 + "value": 99.88996858455674 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -31809,7 +31809,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24111685991236 + "value": 43.23574401996685 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -31821,13 +31821,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.88499518547295 + "value": 75.87636637363696 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.48999388064393 + "value": 19.487777691666526 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -31839,7 +31839,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.54775036729394 + "value": 64.54044886226517 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -31858,19 +31858,19 @@ "time" ], "times": { - "compilation_time": 24033.066, - "data": 79707.738, - "framework": 234429.956, - "kernel_overhead": 33278.098, - "profiling_overhead": 65145.16, - "profiling_runs": 56298.96, + "compilation_time": 15150.569, + "data": 56475.902, + "framework": 190581.41199999998, + "kernel_overhead": 32275.433, + "profiling_overhead": 46833.468, + "profiling_runs": 54996.609, "runtimes": [ - 2197.824 + 2230.272 ], - "search_algorithm": 33.244, - "validation": 23.967 + "search_algorithm": 24.929, + "validation": 16.73 }, - "timestamp": "2026-03-05 08:59:39 UTC" + "timestamp": "2026-03-13 09:41:3 UTC" }, { "compilation_data": { @@ -31891,14 +31891,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -31906,49 +31906,49 @@ { "name": "time", "unit": "", - "value": 1968.736 + "value": 2183.776 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 32.87902373780515 + "value": 24.798216878862586 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 476.0 + "value": 7564.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871724.0 + "value": 1872244.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.162290341690338 + "value": 3.149764851502613 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34083.0 + "value": 41956.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2100609.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.537184734534222 + "value": 21.53424425723229 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -31960,7 +31960,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.25218463042627676 + "value": 0.2521515630349666 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -31990,13 +31990,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.76032997904697 + "value": 97.88676374506385 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89631702027687 + "value": 99.91062963094825 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32056,7 +32056,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.630435741916685 + "value": 43.61975861920062 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32068,13 +32068,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.168362444597 + "value": 86.14472137134345 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.233863658548533 + "value": 11.230781545971046 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32086,7 +32086,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.724280012127075 + "value": 61.70738775567418 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32105,19 +32105,19 @@ "time" ], "times": { - "compilation_time": 24292.315, - "data": 77453.765, - "framework": 229225.332, - "kernel_overhead": 32747.389, - "profiling_overhead": 64498.081, - "profiling_runs": 54526.097, + "compilation_time": 14990.453, + "data": 57170.313, + "framework": 186827.064, + "kernel_overhead": 30286.394, + "profiling_overhead": 47208.536, + "profiling_runs": 52161.821, "runtimes": [ - 1968.736 + 2183.776 ], - "search_algorithm": 34.994, - "validation": 23.032 + "search_algorithm": 38.819, + "validation": 17.218 }, - "timestamp": "2026-03-05 08:59:39 UTC" + "timestamp": "2026-03-13 09:41:3 UTC" }, { "compilation_data": { @@ -32138,14 +32138,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -32153,49 +32153,49 @@ { "name": "time", "unit": "", - "value": 1841.408 + "value": 1925.696 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.14250036064628 + "value": 26.28391298883183 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 476.0 + "value": 3048.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870432.0 + "value": 1870688.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3761854452145563 + "value": 3.3730135376728914 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 30368.0 + "value": 32367.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2099464.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.452016912643977 + "value": 11.449363319319117 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -32207,7 +32207,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1340868151020513 + "value": 0.13406558805402294 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32237,13 +32237,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.13247514005076 + "value": 98.13291401373382 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.88895731024118 + "value": 99.89284088068698 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32303,7 +32303,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.786602839445685 + "value": 37.78015555554653 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32315,13 +32315,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.63835680591511 + "value": 91.62028760485575 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.152477568756507 + "value": 6.15126442659554 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32333,7 +32333,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.50443892667478 + "value": 66.49136896443741 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32352,19 +32352,19 @@ "time" ], "times": { - "compilation_time": 23331.835, - "data": 78515.535, - "framework": 236532.50900000002, - "kernel_overhead": 35924.33, - "profiling_overhead": 64574.527, - "profiling_runs": 57518.117, + "compilation_time": 15482.669, + "data": 56967.527, + "framework": 192678.343, + "kernel_overhead": 33704.686, + "profiling_overhead": 47266.098, + "profiling_runs": 54740.032, "runtimes": [ - 1841.408 + 1925.696 ], - "search_algorithm": 34.155, - "validation": 26.526 + "search_algorithm": 33.7, + "validation": 14.178 }, - "timestamp": "2026-03-05 08:59:39 UTC" + "timestamp": "2026-03-13 09:41:3 UTC" }, { "compilation_data": { @@ -32385,14 +32385,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -32400,49 +32400,49 @@ { "name": "time", "unit": "", - "value": 1788.96 + "value": 1810.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 36.09972508420844 + "value": 27.68950941038361 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 176.0 + "value": 2440.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1865504.0 + "value": 1869424.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4758003920287486 + "value": 3.496568884445208 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 28719.0 + "value": 32650.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098886.0 + "value": 2103205.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.931386357111656 + "value": 5.930355581196915 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -32454,7 +32454,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06946359385233628 + "value": 0.06944478980970048 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32484,13 +32484,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.69054563092972 + "value": 81.64995754438033 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93016649046385 + "value": 99.9127164667793 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32550,7 +32550,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.64159308588437 + "value": 37.63968897043487 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32562,13 +32562,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.90723719425968 + "value": 94.89811678279898 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3713386259191362 + "value": 3.3710146464592903 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32580,7 +32580,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.67533990095885 + "value": 58.669701308388746 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32599,19 +32599,19 @@ "time" ], "times": { - "compilation_time": 24107.009, - "data": 79605.99, - "framework": 212635.928, - "kernel_overhead": 23391.2, - "profiling_overhead": 65211.905, - "profiling_runs": 44426.833, + "compilation_time": 15483.959, + "data": 58135.221, + "framework": 168817.28900000002, + "kernel_overhead": 20869.132, + "profiling_overhead": 48296.924, + "profiling_runs": 41516.012, "runtimes": [ - 1788.96 + 1810.016 ], - "search_algorithm": 36.408, - "validation": 28.284 + "search_algorithm": 30.763, + "validation": 16.044 }, - "timestamp": "2026-03-05 08:59:39 UTC" + "timestamp": "2026-03-13 09:41:3 UTC" }, { "compilation_data": { @@ -32632,14 +32632,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -32647,49 +32647,49 @@ { "name": "time", "unit": "", - "value": 6371.104 + "value": 6973.472 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.80612381334403 + "value": 7.452893973067049 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4120.0 + "value": 6644.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1873384.0 + "value": 1873460.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0052188760079641 + "value": 1.0112051487471492 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114493.0 + "value": 115719.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108063.0 + "value": 2109299.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.28562253711572 + "value": 26.284687719150483 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -32701,7 +32701,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30797007918299707 + "value": 0.3079546910884327 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32731,13 +32731,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.46194986627934 + "value": 97.75577419366488 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97126927209614 + "value": 99.97101177231865 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -32797,7 +32797,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.025850070858766 + "value": 43.023644187259244 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -32809,13 +32809,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.287666061424137 + "value": 26.2864202735711 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400548519593164 + "value": 13.399913459769644 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -32827,7 +32827,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91587590977844 + "value": 79.91210167369344 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -32846,19 +32846,19 @@ "time" ], "times": { - "compilation_time": 25042.663, - "data": 78994.417, - "framework": 2192373.795, - "kernel_overhead": 1001216.611, - "profiling_overhead": 65534.524, - "profiling_runs": 1046628.243, + "compilation_time": 15553.882, + "data": 57333.913, + "framework": 2169511.873, + "kernel_overhead": 1009381.208, + "profiling_overhead": 47911.838, + "profiling_runs": 1054884.914, "runtimes": [ - 6371.104 + 6973.472 ], - "search_algorithm": 60.624, - "validation": 30.401 + "search_algorithm": 26.78, + "validation": 18.146 }, - "timestamp": "2026-03-05 08:59:40 UTC" + "timestamp": "2026-03-13 09:41:4 UTC" }, { "compilation_data": { @@ -32879,14 +32879,14 @@ "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -32894,49 +32894,49 @@ { "name": "time", "unit": "", - "value": 6182.72 + "value": 6284.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.344568518284197 + "value": 7.799456196446572 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 11580.0 + "value": 24620.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872012.0 + "value": 1876840.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0271626218029224 + "value": 1.036195896537624 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114214.0 + "value": 134866.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101611.0 + "value": 2105399.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.384725913150739 + "value": 13.38374898026212 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -32948,7 +32948,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15681296534906278 + "value": 0.15680651186000782 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -32978,13 +32978,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.53639400559108 + "value": 98.52430640210879 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97048999377782 + "value": 99.97073087476257 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33044,7 +33044,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.234851205628885 + "value": 36.23257970716396 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33056,13 +33056,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.770646105571515 + "value": 26.769479883367143 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875663990005185 + "value": 6.875364462231989 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33074,7 +33074,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.50178769640068 + "value": 83.4981651272205 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33093,19 +33093,19 @@ "time" ], "times": { - "compilation_time": 23303.694, - "data": 78629.996, - "framework": 2769953.807, - "kernel_overhead": 1291172.967, - "profiling_overhead": 65370.55, - "profiling_runs": 1334780.294, + "compilation_time": 14112.929, + "data": 58256.772, + "framework": 2749601.722, + "kernel_overhead": 1299832.146, + "profiling_overhead": 48085.475, + "profiling_runs": 1343427.329, "runtimes": [ - 6182.72 + 6284.32 ], - "search_algorithm": 46.017, - "validation": 27.693 + "search_algorithm": 25.525, + "validation": 14.695 }, - "timestamp": "2026-03-05 08:59:42 UTC" + "timestamp": "2026-03-13 09:41:6 UTC" }, { "compilation_data": { @@ -33126,14 +33126,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -33141,49 +33141,49 @@ { "name": "time", "unit": "", - "value": 8192.416 + "value": 8178.015 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.804129292473163 + "value": 6.189229409613588 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13408.0 + "value": 18104.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1933264.0 + "value": 1931632.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.82638483225434 + "value": 48.85579725510688 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 926990.0 + "value": 927674.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424519.0 + "value": 138418121.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.143515509972154 + "value": 5.095351362183663 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -33195,7 +33195,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059432383016558114 + "value": 0.059257180285558556 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33225,13 +33225,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.76851472130176 + "value": 97.74065293890729 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.0172613032649 + "value": 97.85739577279008 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33291,7 +33291,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.440640986841089 + "value": 12.423106915801053 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33303,13 +33303,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.696613161042734 + "value": 20.669312432722727 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.147604356697556 + "value": 13.130261462388804 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33321,7 +33321,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.414600206754365 + "value": 32.371860706101536 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33340,19 +33340,19 @@ "time" ], "times": { - "compilation_time": 23659.143, - "data": 77654.39, - "framework": 953822.304, - "kernel_overhead": 378143.405, - "profiling_overhead": 64397.213, - "profiling_runs": 433627.296, + "compilation_time": 14245.559, + "data": 59050.322, + "framework": 924300.7679999999, + "kernel_overhead": 381068.506, + "profiling_overhead": 48601.672, + "profiling_runs": 435580.268, "runtimes": [ - 8192.416 + 8178.015 ], - "search_algorithm": 44.966, - "validation": 28.464 + "search_algorithm": 25.27, + "validation": 16.187 }, - "timestamp": "2026-03-05 08:59:42 UTC" + "timestamp": "2026-03-13 09:41:6 UTC" }, { "compilation_data": { @@ -33373,14 +33373,14 @@ "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -33388,49 +33388,49 @@ { "name": "time", "unit": "", - "value": 8160.832 + "value": 8367.039 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.903214714414323 + "value": 6.139049490767765 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3244.0 + "value": 22592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2019372.0 + "value": 2023564.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 54.39060288792162 + "value": 54.50557589469205 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 18257687.0 + "value": 18228045.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138422731.0 + "value": 138421860.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5813156027406032 + "value": 2.53485035165659 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -33442,7 +33442,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029615866449795362 + "value": 0.029559202008351482 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33472,13 +33472,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.06993871647624 + "value": 90.0053258015499 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.8810767778805 + "value": 97.9527473465697 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33538,7 +33538,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.142650026234906 + "value": 10.114389765273362 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33550,13 +33550,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.655437695691393 + "value": 20.60083300468511 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.815354131104721 + "value": 11.784119074701469 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33568,7 +33568,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.21095339406554 + "value": 29.13374930985101 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33587,19 +33587,19 @@ "time" ], "times": { - "compilation_time": 23637.008, - "data": 77822.11, - "framework": 921351.403, - "kernel_overhead": 362018.064, - "profiling_overhead": 64434.214, - "profiling_runs": 417077.015, + "compilation_time": 14794.988, + "data": 58639.431, + "framework": 888559.701, + "kernel_overhead": 363191.664, + "profiling_overhead": 48385.595, + "profiling_runs": 418343.011, "runtimes": [ - 8160.832 + 8367.039 ], - "search_algorithm": 45.585, - "validation": 29.084 + "search_algorithm": 32.72, + "validation": 18.514 }, - "timestamp": "2026-03-05 08:59:43 UTC" + "timestamp": "2026-03-13 09:41:7 UTC" }, { "compilation_data": { @@ -33620,14 +33620,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -33635,49 +33635,49 @@ { "name": "time", "unit": "", - "value": 9528.736 + "value": 9763.488 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.409399355838025 + "value": 5.6436501930093925 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17908.0 + "value": 14988.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2296556.0 + "value": 2309264.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.80966690322954 + "value": 77.08855189866334 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 120883884.0 + "value": 121628827.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138417824.0 + "value": 138421568.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0926345259812185 + "value": 1.100986716568427 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -33689,7 +33689,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012761413473597551 + "value": 0.012832633922758657 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33719,13 +33719,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.07571537404945 + "value": 95.31394108218996 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.99409582633444 + "value": 99.45515036216894 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -33785,7 +33785,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.750469989037202 + "value": 7.678263995499303 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -33797,13 +33797,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.78023772659733 + "value": 17.61680796359352 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.608534230425583 + "value": 9.520215924661683 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -33815,7 +33815,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.793744680633978 + "value": 23.575054109401993 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -33834,19 +33834,19 @@ "time" ], "times": { - "compilation_time": 23366.86, - "data": 80774.904, - "framework": 916249.8180000001, - "kernel_overhead": 351879.346, - "profiling_overhead": 66747.15, - "profiling_runs": 416848.418, + "compilation_time": 14117.891, + "data": 59458.56, + "framework": 878308.6239999998, + "kernel_overhead": 352937.437, + "profiling_overhead": 48879.404, + "profiling_runs": 417033.223, "runtimes": [ - 9528.736 + 9763.488 ], - "search_algorithm": 45.105, - "validation": 33.43 + "search_algorithm": 24.945, + "validation": 16.943 }, - "timestamp": "2026-03-05 08:59:44 UTC" + "timestamp": "2026-03-13 09:41:7 UTC" }, { "compilation_data": { @@ -33867,14 +33867,14 @@ "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -33882,49 +33882,49 @@ { "name": "time", "unit": "", - "value": 3981.248 + "value": 4178.88 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 16.085597796264185 + "value": 11.63195267558 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6156.0 + "value": 10188.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1869924.0 + "value": 1874032.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5726548030608558 + "value": 1.5635577916351944 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 72707.0 + "value": 81318.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100509.0 + "value": 2102345.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.896618161860626 + "value": 20.89477701003576 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -33936,7 +33936,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2448150140457833 + "value": 0.24479242123620076 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -33966,13 +33966,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.42068166385242 + "value": 98.55281582316502 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96517318151933 + "value": 99.96530635060329 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34032,7 +34032,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.61348382156134 + "value": 39.60907409323793 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34044,13 +34044,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79631872520104 + "value": 41.792405868411365 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.734796703835814 + "value": 10.733791741593935 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34062,7 +34062,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34841414384464 + "value": 83.34063246746459 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34081,19 +34081,19 @@ "time" ], "times": { - "compilation_time": 23496.392, - "data": 78419.608, - "framework": 1406853.398, - "kernel_overhead": 615962.234, - "profiling_overhead": 64140.026, - "profiling_runs": 648331.53, + "compilation_time": 14371.914, + "data": 57771.465, + "framework": 1377157.131, + "kernel_overhead": 619625.518, + "profiling_overhead": 47592.833, + "profiling_runs": 652167.315, "runtimes": [ - 3981.248 + 4178.88 ], - "search_algorithm": 45.286, - "validation": 28.348 + "search_algorithm": 30.871, + "validation": 16.35 }, - "timestamp": "2026-03-05 08:59:44 UTC" + "timestamp": "2026-03-13 09:41:8 UTC" }, { "compilation_data": { @@ -34114,14 +34114,14 @@ "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -34129,49 +34129,49 @@ { "name": "time", "unit": "", - "value": 4196.704 + "value": 4284.288 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.179710678644783 + "value": 11.526607827362643 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 444.0 + "value": 432.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1872572.0 + "value": 1870176.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.475595775622297 + "value": 1.4774591308641016 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71327.0 + "value": 71443.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099158.0 + "value": 2099172.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.819050462580064 + "value": 9.818085455382157 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -34183,7 +34183,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11503467539986556 + "value": 0.11502635693049244 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34213,13 +34213,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.83762941254982 + "value": 98.80609597830465 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96514096756164 + "value": 99.96616528565768 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34279,7 +34279,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.268291413752344 + "value": 43.26458975628555 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34291,13 +34291,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27886143420288 + "value": 39.27561862564293 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120828126431723 + "value": 5.12040535793294 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34309,7 +34309,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93838081665545 + "value": 90.93089209021228 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34328,19 +34328,19 @@ "time" ], "times": { - "compilation_time": 22912.046, - "data": 82830.595, - "framework": 1923826.2880000002, - "kernel_overhead": 869312.971, - "profiling_overhead": 69202.54, - "profiling_runs": 902480.182, + "compilation_time": 16370.808, + "data": 58272.684, + "framework": 1888064.4, + "kernel_overhead": 874479.164, + "profiling_overhead": 48256.965, + "profiling_runs": 907055.587, "runtimes": [ - 4196.704 + 4284.288 ], - "search_algorithm": 48.857, - "validation": 31.264 + "search_algorithm": 25.849, + "validation": 16.162 }, - "timestamp": "2026-03-05 08:59:45 UTC" + "timestamp": "2026-03-13 09:41:9 UTC" }, { "compilation_data": { @@ -34361,14 +34361,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -34376,49 +34376,49 @@ { "name": "time", "unit": "", - "value": 6848.352 + "value": 6945.472 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.395725791953979 + "value": 7.1399684236094245 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1256.0 + "value": 10584.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871924.0 + "value": 1870712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9368606445392504 + "value": 0.9322472814983518 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114090.0 + "value": 120599.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103030.0 + "value": 2101877.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.041182419994222 + "value": 3.041054828191242 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -34430,7 +34430,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03561599557026055 + "value": 0.03563163466300399 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34460,13 +34460,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.81070698634723 + "value": 98.82279811846546 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93100478714572 + "value": 99.98081860056001 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34526,7 +34526,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.47814570904168 + "value": 36.47595619079078 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34538,13 +34538,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.33063995278877 + "value": 24.329195943531854 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6335268522990505 + "value": 1.6334299034353663 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34556,7 +34556,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.9791211259898 + "value": 87.97391058321715 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34575,19 +34575,19 @@ "time" ], "times": { - "compilation_time": 24765.465, - "data": 78507.175, - "framework": 3142472.9299999997, - "kernel_overhead": 1476527.433, - "profiling_overhead": 65013.92, - "profiling_runs": 1522424.402, + "compilation_time": 14764.134, + "data": 58034.914, + "framework": 3120138.353, + "kernel_overhead": 1484122.298, + "profiling_overhead": 48005.368, + "profiling_runs": 1529975.773, "runtimes": [ - 6848.352 + 6945.472 ], - "search_algorithm": 44.8, - "validation": 27.653 + "search_algorithm": 26.452, + "validation": 16.898 }, - "timestamp": "2026-03-05 08:59:47 UTC" + "timestamp": "2026-03-13 09:41:11 UTC" }, { "compilation_data": { @@ -34608,14 +34608,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -34623,49 +34623,49 @@ { "name": "time", "unit": "", - "value": 9381.056 + "value": 9580.96 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.304740008090986 + "value": 6.315348934537718 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 28736.0 + "value": 28540.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2283580.0 + "value": 2289284.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.82681012099506 + "value": 81.76639960739128 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133095550.0 + "value": 133101522.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425315.0 + "value": 138421595.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1136555259985612 + "value": 1.108071134310571 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -34677,7 +34677,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012744580193030606 + "value": 0.01286605243691179 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34707,13 +34707,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.66069475091538 + "value": 98.7460235550571 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.28106709484224 + "value": 98.18829133473254 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -34773,7 +34773,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.79437417500793 + "value": 7.796475830097011 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -34785,13 +34785,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.88693388809159 + "value": 17.890575364678316 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.194459926729723 + "value": 5.195517430733608 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -34803,7 +34803,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.22891959622547 + "value": 17.232442707789907 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -34822,19 +34822,19 @@ "time" ], "times": { - "compilation_time": 23525.954, - "data": 78876.863, - "framework": 588241.0009999999, - "kernel_overhead": 190636.648, - "profiling_overhead": 65613.933, - "profiling_runs": 253113.557, + "compilation_time": 14054.273, + "data": 58866.964, + "framework": 551568.298, + "kernel_overhead": 191131.968, + "profiling_overhead": 48635.01, + "profiling_runs": 252934.356, "runtimes": [ - 9381.056 + 9580.96 ], - "search_algorithm": 39.339, - "validation": 26.655 + "search_algorithm": 34.668, + "validation": 13.016 }, - "timestamp": "2026-03-05 08:59:47 UTC" + "timestamp": "2026-03-13 09:41:11 UTC" }, { "compilation_data": { @@ -34855,14 +34855,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -34870,49 +34870,49 @@ { "name": "time", "unit": "", - "value": 2818.336 + "value": 2858.432 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.871357727003154 + "value": 17.34734664214523 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5704.0 + "value": 488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870084.0 + "value": 1870916.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.216115674587332 + "value": 2.200680532479743 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 52990.0 + "value": 48572.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100509.0 + "value": 2099246.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.860375956736766 + "value": 14.858901635311028 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -34924,7 +34924,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17406985317814183 + "value": 0.17406320797727376 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -34954,13 +34954,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.65697477473694 + "value": 98.70357184047693 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95011698328501 + "value": 99.95780201358325 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35020,7 +35020,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.94805106011678 + "value": 45.94308334736106 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35032,13 +35032,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.44549642505709 + "value": 59.438656905943574 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.749974387446408 + "value": 7.749082711858855 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35050,7 +35050,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.39983990545713 + "value": 88.38969516411753 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35069,19 +35069,19 @@ "time" ], "times": { - "compilation_time": 24080.61, - "data": 78081.335, - "framework": 1189883.438, - "kernel_overhead": 510516.2, - "profiling_overhead": 64542.718, - "profiling_runs": 536743.185, + "compilation_time": 14430.771, + "data": 58779.781, + "framework": 1155892.145, + "kernel_overhead": 511364.765, + "profiling_overhead": 48573.338, + "profiling_runs": 537174.261, "runtimes": [ - 2818.336 + 2858.432 ], - "search_algorithm": 44.337, - "validation": 27.077 + "search_algorithm": 24.872, + "validation": 12.471 }, - "timestamp": "2026-03-05 08:59:48 UTC" + "timestamp": "2026-03-13 09:41:12 UTC" }, { "compilation_data": { @@ -35102,14 +35102,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -35117,49 +35117,49 @@ { "name": "time", "unit": "", - "value": 3460.16 + "value": 3538.432 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.543733035691638 + "value": 13.944986072423399 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2608.0 + "value": 11012.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868784.0 + "value": 1872940.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8044140573676066 + "value": 1.8130216733063476 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 57885.0 + "value": 67561.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099618.0 + "value": 2104761.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037738847581255 + "value": 6.037114045470682 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -35171,7 +35171,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07073389641076132 + "value": 0.07072536319269761 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35201,13 +35201,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.63925371428081 + "value": 98.62355961730196 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96874307800208 + "value": 99.96455404141808 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35267,7 +35267,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.2236017323822 + "value": 42.22021019097227 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35279,13 +35279,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30277124260986 + "value": 48.29896797173099 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2429839091107695 + "value": 3.242728562555181 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35297,7 +35297,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.40809178755784 + "value": 92.40083823626507 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35316,19 +35316,19 @@ "time" ], "times": { - "compilation_time": 23735.345, - "data": 77464.301, - "framework": 1864602.94, - "kernel_overhead": 846684.464, - "profiling_overhead": 64346.101, - "profiling_runs": 876108.074, + "compilation_time": 14746.37, + "data": 59192.82, + "framework": 1840731.734, + "kernel_overhead": 851699.715, + "profiling_overhead": 48942.643, + "profiling_runs": 880896.556, "runtimes": [ - 3460.16 + 3538.432 ], - "search_algorithm": 46.294, - "validation": 31.036 + "search_algorithm": 25.644, + "validation": 17.5 }, - "timestamp": "2026-03-05 08:59:49 UTC" + "timestamp": "2026-03-13 09:41:13 UTC" }, { "compilation_data": { @@ -35349,14 +35349,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -35364,49 +35364,49 @@ { "name": "time", "unit": "", - "value": 5617.728 + "value": 5813.152 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.467687386096356 + "value": 8.653902702957021 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 15964.0 + "value": 13716.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1868744.0 + "value": 1869772.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1374058573694177 + "value": 1.1220443013060788 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 108866.0 + "value": 105382.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106234.0 + "value": 2103924.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8629439943995987 + "value": 1.862824775326483 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -35418,7 +35418,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.021828306865919127 + "value": 0.021825152536289465 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35448,13 +35448,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.8672609653546 + "value": 73.8706463653676 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.98281451609408 + "value": 99.97302261151874 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35514,7 +35514,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34509806818282 + "value": 42.34317758635363 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35526,13 +35526,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.808037629777782 + "value": 29.806649318460437 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588548523273116 + "value": 1.058805536092772 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35544,7 +35544,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.44913969231415 + "value": 96.44464756727571 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35563,19 +35563,19 @@ "time" ], "times": { - "compilation_time": 23380.559, - "data": 77164.129, - "framework": 3331831.443, - "kernel_overhead": 1575300.664, - "profiling_overhead": 64023.778, - "profiling_runs": 1615342.872, + "compilation_time": 14190.299, + "data": 57768.658, + "framework": 3306099.486, + "kernel_overhead": 1580555.949, + "profiling_overhead": 47517.934, + "profiling_runs": 1620256.945, "runtimes": [ - 5617.728 + 5813.152 ], - "search_algorithm": 46.751, - "validation": 30.198 + "search_algorithm": 25.745, + "validation": 15.061 }, - "timestamp": "2026-03-05 08:59:51 UTC" + "timestamp": "2026-03-13 09:41:14 UTC" }, { "compilation_data": { @@ -35596,14 +35596,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -35611,49 +35611,49 @@ { "name": "time", "unit": "", - "value": 3368.0 + "value": 3381.28 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.185887750830652 + "value": 14.604445100236084 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7208.0 + "value": 544.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870148.0 + "value": 1868152.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8640471010703108 + "value": 1.859550594773616 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 60682.0 + "value": 54367.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100454.0 + "value": 2099626.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.2271739728338815 + "value": 6.226580988055755 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -35665,7 +35665,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07294943692153376 + "value": 0.07294534892666808 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35695,13 +35695,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68448483285528 + "value": 98.67722484740781 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96018313852841 + "value": 99.9631106709405 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -35761,7 +35761,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.77189124935575 + "value": 42.76816609754263 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -35773,13 +35773,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.81998569312231 + "value": 49.81573489097275 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.344847672267733 + "value": 3.3445622790570084 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -35791,7 +35791,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.86161422140057 + "value": 89.85397162001281 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -35810,19 +35810,19 @@ "time" ], "times": { - "compilation_time": 23737.275, - "data": 76243.924, - "framework": 1737459.703, - "kernel_overhead": 784567.889, - "profiling_overhead": 63235.061, - "profiling_runs": 813412.829, + "compilation_time": 14398.826, + "data": 57768.798, + "framework": 1710377.773, + "kernel_overhead": 788286.29, + "profiling_overhead": 47603.974, + "profiling_runs": 816718.711, "runtimes": [ - 3368.0 + 3381.28 ], - "search_algorithm": 47.201, - "validation": 26.52 + "search_algorithm": 31.619, + "validation": 15.236 }, - "timestamp": "2026-03-05 08:59:52 UTC" + "timestamp": "2026-03-13 09:41:15 UTC" }, { "compilation_data": { @@ -35843,14 +35843,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -35858,49 +35858,49 @@ { "name": "time", "unit": "", - "value": 6283.712 + "value": 6325.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.29594977525894 + "value": 7.8109824754261075 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 17540.0 + "value": 4732.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1870668.0 + "value": 1868844.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0246750835882101 + "value": 1.0169308404857895 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 120321.0 + "value": 105846.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106523.0 + "value": 2102077.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6607121937831946 + "value": 1.6605954408216101 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -35912,7 +35912,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019457398889693174 + "value": 0.019455918901244713 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -35942,13 +35942,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.99982124551232 + "value": 81.99701389020532 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97432925336001 + "value": 99.96841609519392 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36008,7 +36008,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24541028159244 + "value": 43.2446669083844 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36020,13 +36020,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.5726566834338 + "value": 26.572207147109665 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9439261590428756 + "value": 0.9439101904063616 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36038,7 +36038,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.27779125807814 + "value": 94.2761963369442 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36057,19 +36057,19 @@ "time" ], "times": { - "compilation_time": 24185.16, - "data": 78146.607, - "framework": 3689080.967, - "kernel_overhead": 1751505.842, - "profiling_overhead": 64634.139, - "profiling_runs": 1794794.379, + "compilation_time": 14989.173, + "data": 59245.283, + "framework": 3669195.885, + "kernel_overhead": 1758996.722, + "profiling_overhead": 48972.721, + "profiling_runs": 1801981.159, "runtimes": [ - 6283.712 + 6325.632 ], - "search_algorithm": 46.915, - "validation": 29.54 + "search_algorithm": 25.919, + "validation": 16.913 }, - "timestamp": "2026-03-05 08:59:54 UTC" + "timestamp": "2026-03-13 09:41:17 UTC" }, { "compilation_data": { @@ -36090,14 +36090,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "16", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 16, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -36105,49 +36105,49 @@ { "name": "time", "unit": "", - "value": 5985.952 + "value": 6000.64 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.797281811514486 + "value": 8.188328565350293 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13160.0 + "value": 2880.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1871268.0 + "value": 1869268.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0721199205383505 + "value": 1.0628362318840578 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 111822.0 + "value": 99480.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109269.0 + "value": 2100167.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7422597444152297 + "value": 1.742217922090571 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -36159,7 +36159,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02041221036895768 + "value": 0.020411647515963294 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36189,13 +36189,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98707983621536 + "value": 81.9846673466584 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96936569587847 + "value": 99.97362415811504 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36255,7 +36255,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.06313842560343 + "value": 44.06007854343413 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36267,13 +36267,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.878011458563474 + "value": 27.87605528606123 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902955730519982 + "value": 0.9902260849907004 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36285,7 +36285,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98874312246363 + "value": 94.98207744088447 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36304,19 +36304,19 @@ "time" ], "times": { - "compilation_time": 24149.084, - "data": 79213.128, - "framework": 3610749.3710000003, - "kernel_overhead": 1711734.6, - "profiling_overhead": 66028.534, - "profiling_runs": 1753773.109, + "compilation_time": 14804.353, + "data": 57760.812, + "framework": 3579066.4639999997, + "kernel_overhead": 1716305.222, + "profiling_overhead": 47557.957, + "profiling_runs": 1757442.473, "runtimes": [ - 5985.952 + 6000.64 ], - "search_algorithm": 48.258, - "validation": 31.185 + "search_algorithm": 25.253, + "validation": 16.634 }, - "timestamp": "2026-03-05 08:59:56 UTC" + "timestamp": "2026-03-13 09:41:19 UTC" }, { "compilation_data": { @@ -36337,14 +36337,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -36352,49 +36352,49 @@ { "name": "time", "unit": "", - "value": 6976.736 + "value": 7055.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.919748393478073 + "value": 6.888008002008792 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14832.0 + "value": 17036.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843604.0 + "value": 1843912.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9290804387164939 + "value": 0.9238768326387793 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133282.0 + "value": 133095.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2114657.0 + "value": 2111631.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.98223235977743 + "value": 47.98659024177635 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -36406,7 +36406,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5622404444389919 + "value": 0.5621688081920045 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36436,13 +36436,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.30649118490541 + "value": 65.22088961988341 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95456084202118 + "value": 99.94550797934127 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36502,7 +36502,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.375209390200993 + "value": 21.374225298268623 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36514,13 +36514,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.999830950497238 + "value": 23.99894666049764 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.374828309098753 + "value": 24.373930202067918 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36532,7 +36532,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.20313458261315 + "value": 47.2014413043658 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36551,19 +36551,19 @@ "time" ], "times": { - "compilation_time": 24158.466, - "data": 77772.106, - "framework": 541980.463, - "kernel_overhead": 175712.419, - "profiling_overhead": 64563.643, - "profiling_runs": 223932.295, + "compilation_time": 14686.102, + "data": 59040.489, + "framework": 504182.887, + "kernel_overhead": 174269.622, + "profiling_overhead": 48829.997, + "profiling_runs": 222042.779, "runtimes": [ - 6976.736 + 7055.296 ], - "search_algorithm": 53.542, - "validation": 33.108 + "search_algorithm": 22.682, + "validation": 15.559 }, - "timestamp": "2026-03-05 08:59:56 UTC" + "timestamp": "2026-03-13 09:41:19 UTC" }, { "compilation_data": { @@ -36584,14 +36584,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -36599,49 +36599,49 @@ { "name": "time", "unit": "", - "value": 3624.288 + "value": 4046.208 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.236768693607218 + "value": 13.075101565192156 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2452.0 + "value": 6940.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836372.0 + "value": 1838620.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7103301266846866 + "value": 1.70131183578719 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 62647.0 + "value": 68565.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099170.0 + "value": 2100734.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.69463555031057 + "value": 45.69506274431203 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -36653,7 +36653,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5353009826484592 + "value": 0.5352690954516394 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36683,13 +36683,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.47143389281624 + "value": 75.37722329320206 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91055038028239 + "value": 99.90595873466445 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36749,7 +36749,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.97708426824793 + "value": 28.977331254604366 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -36761,13 +36761,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.71991347474022 + "value": 45.71929114510185 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.306440267396873 + "value": 23.306123025139808 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -36779,7 +36779,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.882484923107704 + "value": 56.88180160056062 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -36798,19 +36798,19 @@ "time" ], "times": { - "compilation_time": 22922.368, - "data": 79448.923, - "framework": 359343.609, - "kernel_overhead": 92599.387, - "profiling_overhead": 64437.211, - "profiling_runs": 122858.088, + "compilation_time": 14546.306, + "data": 57598.973, + "framework": 318209.226, + "kernel_overhead": 91205.528, + "profiling_overhead": 47398.963, + "profiling_runs": 122005.762, "runtimes": [ - 3624.288 + 4046.208 ], - "search_algorithm": 48.938, - "validation": 34.116 + "search_algorithm": 27.392, + "validation": 18.064 }, - "timestamp": "2026-03-05 08:59:56 UTC" + "timestamp": "2026-03-13 09:41:19 UTC" }, { "compilation_data": { @@ -36831,14 +36831,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -36846,49 +36846,49 @@ { "name": "time", "unit": "", - "value": 2228.256 + "value": 2245.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.53126361224409 + "value": 21.720855412566237 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6200.0 + "value": 504.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838280.0 + "value": 1835864.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.7988896410853266 + "value": 2.7932784664881956 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 43516.0 + "value": 37807.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100136.0 + "value": 2099487.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.92711637824371 + "value": 37.92586076828753 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -36900,7 +36900,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4442566460665906 + "value": 0.4442042515349954 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -36930,13 +36930,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.88531299371621 + "value": 96.14097415204756 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91894651512241 + "value": 99.92453890266584 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -36996,7 +36996,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.238934598148695 + "value": 43.23092087172374 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37008,13 +37008,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.88130537107199 + "value": 75.86810983684879 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.489046203703058 + "value": 19.48565711630003 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37026,7 +37026,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.54445266367635 + "value": 64.533377238322 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37045,19 +37045,19 @@ "time" ], "times": { - "compilation_time": 24044.82, - "data": 77904.741, - "framework": 232560.98599999998, - "kernel_overhead": 33602.637, - "profiling_overhead": 64140.925, - "profiling_runs": 56912.683, + "compilation_time": 15051.729, + "data": 57536.988, + "framework": 191903.772, + "kernel_overhead": 32064.379, + "profiling_overhead": 47284.411, + "profiling_runs": 55017.994, "runtimes": [ - 2228.256 + 2245.568 ], - "search_algorithm": 46.617, - "validation": 26.696 + "search_algorithm": 35.413, + "validation": 15.323 }, - "timestamp": "2026-03-05 08:59:56 UTC" + "timestamp": "2026-03-13 09:41:20 UTC" }, { "compilation_data": { @@ -37078,14 +37078,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -37093,49 +37093,49 @@ { "name": "time", "unit": "", - "value": 1990.304 + "value": 2088.448 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 32.142936969155116 + "value": 22.911341409202045 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2712.0 + "value": 6112.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838024.0 + "value": 1841048.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1677998854257843 + "value": 3.1503082026330875 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35187.0 + "value": 41581.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099436.0 + "value": 2101148.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.537040691324844 + "value": 21.532962939138056 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -37147,7 +37147,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.25222391892708773 + "value": 0.252187037077323 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37177,13 +37177,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.95824959494315 + "value": 97.84719307509752 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91193593301041 + "value": 99.920498257184 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37243,7 +37243,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.63054587225425 + "value": 43.62083560969914 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37255,13 +37255,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.16831431582216 + "value": 86.14833141394058 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.233857383947518 + "value": 11.231252191172917 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37273,7 +37273,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.72406449131474 + "value": 61.709917226560606 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37292,19 +37292,19 @@ "time" ], "times": { - "compilation_time": 23822.036, - "data": 77405.131, - "framework": 229063.786, - "kernel_overhead": 32661.987, - "profiling_overhead": 64386.502, - "profiling_runs": 54610.166, + "compilation_time": 15032.414, + "data": 57421.193, + "framework": 186134.418, + "kernel_overhead": 29616.632, + "profiling_overhead": 47323.853, + "profiling_runs": 51772.74, "runtimes": [ - 1990.304 + 2088.448 ], - "search_algorithm": 42.243, - "validation": 23.217 + "search_algorithm": 25.508, + "validation": 16.966 }, - "timestamp": "2026-03-05 08:59:56 UTC" + "timestamp": "2026-03-13 09:41:20 UTC" }, { "compilation_data": { @@ -37325,14 +37325,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -37340,49 +37340,49 @@ { "name": "time", "unit": "", - "value": 1860.704 + "value": 1845.792 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.33315999617463 + "value": 26.025491018288065 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 716.0 + "value": 7108.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837388.0 + "value": 1839944.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.360913602977359 + "value": 3.3950844452034126 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 31476.0 + "value": 39083.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099174.0 + "value": 2104419.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.452660450103078 + "value": 11.450248651536791 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -37394,7 +37394,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13411339130070343 + "value": 0.13409483368224762 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37424,13 +37424,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.25139844852092 + "value": 98.27875871475187 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90575165873643 + "value": 99.91699690211252 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37490,7 +37490,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78870097511392 + "value": 37.77836471462889 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37502,13 +37502,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.64111202261034 + "value": 91.61811900408068 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.152662550346154 + "value": 6.151118829619675 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37520,7 +37520,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.50624182160324 + "value": 66.48973405762794 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37539,19 +37539,19 @@ "time" ], "times": { - "compilation_time": 25976.204, - "data": 76949.149, - "framework": 234517.464, - "kernel_overhead": 36203.128, - "profiling_overhead": 63837.546, - "profiling_runs": 57527.641, + "compilation_time": 15304.788, + "data": 56966.953, + "framework": 192340.883, + "kernel_overhead": 33616.358, + "profiling_overhead": 47170.566, + "profiling_runs": 54587.006, "runtimes": [ - 1860.704 + 1845.792 ], - "search_algorithm": 30.616, - "validation": 24.73 + "search_algorithm": 30.021, + "validation": 14.905 }, - "timestamp": "2026-03-05 08:59:57 UTC" + "timestamp": "2026-03-13 09:41:20 UTC" }, { "compilation_data": { @@ -37572,14 +37572,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -37587,49 +37587,49 @@ { "name": "time", "unit": "", - "value": 1801.632 + "value": 1815.296 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.53418433042789 + "value": 26.998551978336096 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 356.0 + "value": 224.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836460.0 + "value": 1837440.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4790664465648664 + "value": 3.4912180035355482 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 29535.0 + "value": 29534.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098979.0 + "value": 2098995.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.931460255308655 + "value": 5.930339230548586 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -37641,7 +37641,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06945531724880524 + "value": 0.06941621460637369 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37671,13 +37671,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.69100383089314 + "value": 81.69454514104886 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90346598311662 + "value": 99.86655823019383 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37737,7 +37737,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.64519713262409 + "value": 37.63854472271837 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37749,13 +37749,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.92129115226228 + "value": 94.90291180100705 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3718378570933014 + "value": 3.3711849773062803 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -37767,7 +37767,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.68388362376331 + "value": 58.67266577486683 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -37786,19 +37786,19 @@ "time" ], "times": { - "compilation_time": 23962.465, - "data": 78362.365, - "framework": 211194.99, - "kernel_overhead": 23267.102, - "profiling_overhead": 65058.9, - "profiling_runs": 44506.623, + "compilation_time": 14822.072, + "data": 58469.61, + "framework": 169397.222, + "kernel_overhead": 20870.886, + "profiling_overhead": 48516.819, + "profiling_runs": 41539.907, "runtimes": [ - 1801.632 + 1815.296 ], - "search_algorithm": 32.276, - "validation": 24.111 + "search_algorithm": 26.274, + "validation": 15.603 }, - "timestamp": "2026-03-05 08:59:57 UTC" + "timestamp": "2026-03-13 09:41:20 UTC" }, { "compilation_data": { @@ -37819,14 +37819,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -37834,49 +37834,49 @@ { "name": "time", "unit": "", - "value": 6219.232 + "value": 6406.208 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.545415038739156 + "value": 7.142914835874903 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6404.0 + "value": 488.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838656.0 + "value": 1838960.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9987229231401331 + "value": 0.9928045163900729 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109461.0 + "value": 104956.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100518.0 + "value": 2099180.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286276846293894 + "value": 26.284801479237004 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -37888,7 +37888,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30797285788686707 + "value": 0.3079633738367533 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -37918,13 +37918,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.96946830588224 + "value": 97.94656402106256 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97092038407884 + "value": 99.97406804457385 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -37984,7 +37984,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02645845501432 + "value": 43.023977601544544 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -37996,13 +37996,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.287994987655104 + "value": 26.286357800657633 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400716194878873 + "value": 13.399881613225864 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38014,7 +38014,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91682014027153 + "value": 79.91189451806795 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38033,19 +38033,19 @@ "time" ], "times": { - "compilation_time": 26164.951, - "data": 79103.581, - "framework": 2196984.139, - "kernel_overhead": 1003776.618, - "profiling_overhead": 65820.221, - "profiling_runs": 1048283.719, + "compilation_time": 15390.895, + "data": 58967.596, + "framework": 2156706.778, + "kernel_overhead": 1002240.735, + "profiling_overhead": 48337.885, + "profiling_runs": 1047160.562, "runtimes": [ - 6219.232 + 6406.208 ], - "search_algorithm": 55.983, - "validation": 29.226 + "search_algorithm": 30.12, + "validation": 15.675 }, - "timestamp": "2026-03-05 08:59:58 UTC" + "timestamp": "2026-03-13 09:41:21 UTC" }, { "compilation_data": { @@ -38066,14 +38066,14 @@ "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -38081,49 +38081,49 @@ { "name": "time", "unit": "", - "value": 6144.704 + "value": 6276.64 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.058490005591278 + "value": 7.828358509837652 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 920.0 + "value": 20000.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841216.0 + "value": 1843976.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0147087164549977 + "value": 1.0333337798774174 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 102358.0 + "value": 122121.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099490.0 + "value": 2105276.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.384364571771066 + "value": 13.383591981725798 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -38135,7 +38135,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15681604219944198 + "value": 0.15679773296611085 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38165,13 +38165,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.73096051384066 + "value": 98.7814904321765 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97077186756191 + "value": 99.96615747172844 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38231,7 +38231,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.235091304396015 + "value": 36.23264143584361 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38243,13 +38243,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.771095893400947 + "value": 26.769205802257957 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875779511684033 + "value": 6.875294068353361 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38261,7 +38261,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.50313500484299 + "value": 83.49729147773967 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38280,19 +38280,19 @@ "time" ], "times": { - "compilation_time": 23539.685, - "data": 78102.244, - "framework": 2775194.272, - "kernel_overhead": 1294130.703, - "profiling_overhead": 64893.993, - "profiling_runs": 1338067.332, + "compilation_time": 15493.793, + "data": 58680.208, + "framework": 2737684.8310000002, + "kernel_overhead": 1293572.354, + "profiling_overhead": 48603.459, + "profiling_runs": 1336828.81, "runtimes": [ - 6144.704 + 6276.64 ], - "search_algorithm": 46.736, - "validation": 29.396 + "search_algorithm": 25.352, + "validation": 17.037 }, - "timestamp": "2026-03-05 08:59:59 UTC" + "timestamp": "2026-03-13 09:41:23 UTC" }, { "compilation_data": { @@ -38313,14 +38313,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -38328,49 +38328,49 @@ { "name": "time", "unit": "", - "value": 8072.448 + "value": 8479.136 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.04897187303079 + "value": 5.953568349656736 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16484.0 + "value": 21788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912776.0 + "value": 1914080.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.6625117633078 + "value": 48.77726551308113 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 609694.0 + "value": 682799.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419700.0 + "value": 138427026.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.08855867674404 + "value": 5.12656241605026 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -38382,7 +38382,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059218682067700666 + "value": 0.0599462503420395 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38412,13 +38412,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.33135533592075 + "value": 94.23877268587596 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.09977888294472 + "value": 99.5653249694866 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38478,7 +38478,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.385534521328793 + "value": 12.351833362762246 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38490,13 +38490,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.604847815094697 + "value": 20.550983443637254 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.089310062225682 + "value": 13.055092509849645 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38508,7 +38508,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.27081945680216 + "value": 32.18651685810803 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38527,19 +38527,19 @@ "time" ], "times": { - "compilation_time": 24440.348, - "data": 78734.352, - "framework": 961842.9010000001, - "kernel_overhead": 380984.278, - "profiling_overhead": 65779.525, - "profiling_runs": 436344.746, + "compilation_time": 14126.985, + "data": 58974.042, + "framework": 919233.074, + "kernel_overhead": 378167.332, + "profiling_overhead": 48827.317, + "profiling_runs": 433264.383, "runtimes": [ - 8072.448 + 8479.136 ], - "search_algorithm": 79.203, - "validation": 37.402 + "search_algorithm": 24.575, + "validation": 14.584 }, - "timestamp": "2026-03-05 09:00:0 UTC" + "timestamp": "2026-03-13 09:41:23 UTC" }, { "compilation_data": { @@ -38560,14 +38560,14 @@ "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -38575,49 +38575,49 @@ { "name": "time", "unit": "", - "value": 8130.784 + "value": 8451.072 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.900402362411498 + "value": 6.189792406575101 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2848.0 + "value": 12368.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1999612.0 + "value": 2004304.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.602665436616036 + "value": 53.65165240203037 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 14899102.0 + "value": 14880280.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138419214.0 + "value": 138424098.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.594141396113949 + "value": 2.57731265414941 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -38629,7 +38629,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029836958069677018 + "value": 0.029960495448286835 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38659,13 +38659,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.24919211840647 + "value": 89.21683042164096 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.78342280171512 + "value": 98.74200114751038 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38725,7 +38725,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.123598332366093 + "value": 10.17049792190932 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38737,13 +38737,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.619549445846758 + "value": 20.71360851681453 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.794825281156971 + "value": 11.848629090550892 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -38755,7 +38755,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.160138261166452 + "value": 29.293215885462736 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -38774,19 +38774,19 @@ "time" ], "times": { - "compilation_time": 24218.91, - "data": 78312.064, - "framework": 924757.922, - "kernel_overhead": 363142.723, - "profiling_overhead": 65336.315, - "profiling_runs": 417966.82, + "compilation_time": 14679.663, + "data": 58793.581, + "framework": 884792.574, + "kernel_overhead": 361064.44, + "profiling_overhead": 48506.7, + "profiling_runs": 416427.853, "runtimes": [ - 8130.784 + 8451.072 ], - "search_algorithm": 48.697, - "validation": 27.214 + "search_algorithm": 24.12, + "validation": 13.994 }, - "timestamp": "2026-03-05 09:00:1 UTC" + "timestamp": "2026-03-13 09:41:24 UTC" }, { "compilation_data": { @@ -38807,14 +38807,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -38822,49 +38822,49 @@ { "name": "time", "unit": "", - "value": 9178.752 + "value": 9429.248 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.512745632239801 + "value": 5.9656712162143535 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 20220.0 + "value": 19036.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2194648.0 + "value": 2197040.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 73.27596037252229 + "value": 73.0614777410446 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 100265387.0 + "value": 100502254.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138418147.0 + "value": 138425449.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.129554155330695 + "value": 1.141309117724621 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -38876,7 +38876,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01326300229168878 + "value": 0.01299706969386697 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -38906,13 +38906,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.15328889802461 + "value": 89.4785903352049 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.68511493565813 + "value": 97.4832408878674 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -38972,7 +38972,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.91838810241922 + "value": 7.936586858167701 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -38984,13 +38984,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.16561995299617 + "value": 18.203469978090965 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.8167968178606 + "value": 9.837251171021569 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39002,7 +39002,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.309410651844388 + "value": 24.360116990699073 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39021,19 +39021,19 @@ "time" ], "times": { - "compilation_time": 24709.323, - "data": 79571.877, - "framework": 915947.878, - "kernel_overhead": 354085.079, - "profiling_overhead": 65344.495, - "profiling_runs": 416946.427, + "compilation_time": 15226.934, + "data": 58512.256, + "framework": 868638.654, + "kernel_overhead": 349665.948, + "profiling_overhead": 48296.749, + "profiling_runs": 412163.701, "runtimes": [ - 9178.752 + 9429.248 ], - "search_algorithm": 45.035, - "validation": 31.079 + "search_algorithm": 25.432, + "validation": 15.772 }, - "timestamp": "2026-03-05 09:00:1 UTC" + "timestamp": "2026-03-13 09:41:24 UTC" }, { "compilation_data": { @@ -39054,14 +39054,14 @@ "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -39069,49 +39069,49 @@ { "name": "time", "unit": "", - "value": 3972.032 + "value": 4029.632 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.787015189095458 + "value": 12.070048927456444 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3524.0 + "value": 3028.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837732.0 + "value": 1837892.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5694314330587167 + "value": 1.5711473659317747 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 68983.0 + "value": 69090.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099750.0 + "value": 2100102.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.895878747324076 + "value": 20.8945489218635 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -39123,7 +39123,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2448179161767602 + "value": 0.2447844482989717 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39153,13 +39153,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.74736528742896 + "value": 98.64304255796716 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96466892625487 + "value": 99.96224141866213 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39219,7 +39219,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.61355797678639 + "value": 39.608644392329126 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39231,13 +39231,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79702503190437 + "value": 41.79232603244245 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.734978108780124 + "value": 10.73377123684801 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39249,7 +39249,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34973387772632 + "value": 83.34044399253526 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39268,19 +39268,19 @@ "time" ], "times": { - "compilation_time": 23975.864, - "data": 78273.726, - "framework": 1414019.4139999999, - "kernel_overhead": 619194.085, - "profiling_overhead": 65136.846, - "profiling_runs": 651414.757, + "compilation_time": 15021.459, + "data": 58517.358, + "framework": 1376262.369, + "kernel_overhead": 618734.556, + "profiling_overhead": 48269.183, + "profiling_runs": 650741.272, "runtimes": [ - 3972.032 + 4029.632 ], - "search_algorithm": 48.854, - "validation": 25.401 + "search_algorithm": 24.141, + "validation": 15.923 }, - "timestamp": "2026-03-05 09:00:2 UTC" + "timestamp": "2026-03-13 09:41:25 UTC" }, { "compilation_data": { @@ -39301,14 +39301,14 @@ "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -39316,49 +39316,49 @@ { "name": "time", "unit": "", - "value": 4215.552 + "value": 4286.048 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.847682196047693 + "value": 11.316152693430082 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4924.0 + "value": 592.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837728.0 + "value": 1835920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4790217160700008 + "value": 1.476767000171609 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 74544.0 + "value": 70665.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100249.0 + "value": 2098998.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.81892512849066 + "value": 9.81806999980185 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -39370,7 +39370,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11503605385143993 + "value": 0.11502274455894351 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39400,13 +39400,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.92691866663995 + "value": 98.87989960758149 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9678956565511 + "value": 99.96392829326905 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39466,7 +39466,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.26739049728806 + "value": 43.26459964327525 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39478,13 +39478,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.2782497388217 + "value": 39.275264067524965 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120748379035837 + "value": 5.120359133803303 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39496,7 +39496,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93688106975301 + "value": 90.93004663706377 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39515,19 +39515,19 @@ "time" ], "times": { - "compilation_time": 23870.534, - "data": 77324.653, - "framework": 1917854.088, - "kernel_overhead": 871462.818, - "profiling_overhead": 64274.521, - "profiling_runs": 904792.096, + "compilation_time": 14719.263, + "data": 58701.633, + "framework": 1886054.281, + "kernel_overhead": 873075.591, + "profiling_overhead": 48396.837, + "profiling_runs": 905880.22, "runtimes": [ - 4215.552 + 4286.048 ], - "search_algorithm": 57.625, - "validation": 27.879 + "search_algorithm": 25.972, + "validation": 15.853 }, - "timestamp": "2026-03-05 09:00:3 UTC" + "timestamp": "2026-03-13 09:41:26 UTC" }, { "compilation_data": { @@ -39548,14 +39548,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -39563,49 +39563,49 @@ { "name": "time", "unit": "", - "value": 6903.776 + "value": 6944.48 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.184425418034566 + "value": 7.030240466358999 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 832.0 + "value": 13220.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839768.0 + "value": 1839164.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9348135401804258 + "value": 0.9341695118239 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115263.0 + "value": 124925.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2103382.0 + "value": 2102056.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0412437887061583 + "value": 3.0410618287599966 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -39617,7 +39617,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.035632601777576184 + "value": 0.03563169768917074 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39647,13 +39647,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.85410824412926 + "value": 98.84785016119069 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97656318394232 + "value": 99.97963457675819 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39713,7 +39713,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.47847535620343 + "value": 36.4764868523816 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39725,13 +39725,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.330891866530628 + "value": 24.329527100307267 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6335437654531062 + "value": 1.6334521368614499 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39743,7 +39743,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.97997947019509 + "value": 87.9750908192645 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -39762,19 +39762,19 @@ "time" ], "times": { - "compilation_time": 24408.376, - "data": 76201.076, - "framework": 3152492.208, - "kernel_overhead": 1483563.673, - "profiling_overhead": 63213.95, - "profiling_runs": 1529513.509, + "compilation_time": 15109.783, + "data": 59832.545, + "framework": 3116222.4579999996, + "kernel_overhead": 1480422.893, + "profiling_overhead": 49750.324, + "profiling_runs": 1526216.696, "runtimes": [ - 6903.776 + 6944.48 ], - "search_algorithm": 50.44, - "validation": 31.183 + "search_algorithm": 36.836, + "validation": 17.855 }, - "timestamp": "2026-03-05 09:00:5 UTC" + "timestamp": "2026-03-13 09:41:28 UTC" }, { "compilation_data": { @@ -39795,14 +39795,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -39810,49 +39810,49 @@ { "name": "time", "unit": "", - "value": 9419.264 + "value": 9658.688 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.512734828942912 + "value": 5.647078859972619 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 14496.0 + "value": 7756.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2199888.0 + "value": 2199220.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.7199263060559 + "value": 81.6655203211476 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 133004985.0 + "value": 133070327.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138426999.0 + "value": 138424154.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1129817407828222 + "value": 1.1182091927382722 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -39864,7 +39864,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01287214272843041 + "value": 0.012919427999895282 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -39894,13 +39894,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.86011929677683 + "value": 98.8982202127256 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.46560880709742 + "value": 98.52452539921906 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -39960,7 +39960,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.778050797872386 + "value": 7.802397388600814 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -39972,13 +39972,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.848633397454325 + "value": 17.903487100681676 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.183337262273418 + "value": 5.199267066958217 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -39990,7 +39990,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.191973958176984 + "value": 17.24486225800338 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40009,19 +40009,19 @@ "time" ], "times": { - "compilation_time": 26344.782, - "data": 77531.561, - "framework": 589961.96, - "kernel_overhead": 192458.676, - "profiling_overhead": 64832.627, - "profiling_runs": 255139.096, + "compilation_time": 14179.867, + "data": 58929.24, + "framework": 547677.12, + "kernel_overhead": 188348.898, + "profiling_overhead": 48748.344, + "profiling_runs": 251650.638, "runtimes": [ - 9419.264 + 9658.688 ], - "search_algorithm": 33.08, - "validation": 29.682 + "search_algorithm": 25.84, + "validation": 15.836 }, - "timestamp": "2026-03-05 09:00:5 UTC" + "timestamp": "2026-03-13 09:41:28 UTC" }, { "compilation_data": { @@ -40042,14 +40042,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -40057,49 +40057,49 @@ { "name": "time", "unit": "", - "value": 2839.04 + "value": 2836.576 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.412756416224436 + "value": 17.137482991979375 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 8040.0 + "value": 476.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840808.0 + "value": 1837408.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.210598843561634 + "value": 2.211686907620278 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 56077.0 + "value": 48048.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104252.0 + "value": 2099180.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.86028684687657 + "value": 14.85838960015036 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -40111,7 +40111,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17407918690513116 + "value": 0.17406386137898583 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40141,13 +40141,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.80352031798236 + "value": 98.80180675968877 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95630799053643 + "value": 99.95840720269969 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40207,7 +40207,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.947575104895826 + "value": 45.942716146283 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40219,13 +40219,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.445001846116995 + "value": 59.438520160569176 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.749909908649041 + "value": 7.749064884214829 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40237,7 +40237,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.39897776073991 + "value": 88.38944863557491 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40256,19 +40256,19 @@ "time" ], "times": { - "compilation_time": 23286.028, - "data": 77528.53, - "framework": 1197747.8220000002, - "kernel_overhead": 514690.22, - "profiling_overhead": 64431.905, - "profiling_runs": 541097.167, + "compilation_time": 15016.712, + "data": 57524.719, + "framework": 1149413.441, + "kernel_overhead": 509331.702, + "profiling_overhead": 47367.496, + "profiling_runs": 535189.524, "runtimes": [ - 2839.04 + 2836.576 ], - "search_algorithm": 40.351, - "validation": 31.443 + "search_algorithm": 38.789, + "validation": 18.614 }, - "timestamp": "2026-03-05 09:00:6 UTC" + "timestamp": "2026-03-13 09:41:28 UTC" }, { "compilation_data": { @@ -40289,14 +40289,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -40304,49 +40304,49 @@ { "name": "time", "unit": "", - "value": 3433.408 + "value": 3615.2 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.214500887874173 + "value": 13.71812342761302 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 824.0 + "value": 4672.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837296.0 + "value": 1837920.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8054145829631547 + "value": 1.7799336295098027 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 58626.0 + "value": 62774.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099900.0 + "value": 2100248.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037865353269417 + "value": 6.037291361586821 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -40358,7 +40358,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07073559841521865 + "value": 0.07073072064059192 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40388,13 +40388,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.70261585520078 + "value": 98.70057908323197 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9681423929816 + "value": 99.96646309471538 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40454,7 +40454,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.224851823466636 + "value": 42.22282983642167 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40466,13 +40466,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30422375456628 + "value": 48.30170418742828 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.243081428834406 + "value": 3.2429122684430607 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40484,7 +40484,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.41076730026462 + "value": 92.40603906687596 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40503,19 +40503,19 @@ "time" ], "times": { - "compilation_time": 24178.548, - "data": 77800.833, - "framework": 1883855.96, - "kernel_overhead": 856205.541, - "profiling_overhead": 64133.812, - "profiling_runs": 885715.774, + "compilation_time": 14414.732, + "data": 57929.212, + "framework": 1831595.019, + "kernel_overhead": 848653.79, + "profiling_overhead": 47127.48, + "profiling_runs": 877884.537, "runtimes": [ - 3433.408 + 3615.2 ], - "search_algorithm": 37.431, - "validation": 27.653 + "search_algorithm": 26.839, + "validation": 16.371 }, - "timestamp": "2026-03-05 09:00:7 UTC" + "timestamp": "2026-03-13 09:41:29 UTC" }, { "compilation_data": { @@ -40536,14 +40536,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -40551,49 +40551,49 @@ { "name": "time", "unit": "", - "value": 5580.448 + "value": 5674.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.346890462545408 + "value": 8.54729196919561 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16120.0 + "value": 3984.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839004.0 + "value": 1834332.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.143832056107912 + "value": 1.127958455212252 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109664.0 + "value": 94998.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106172.0 + "value": 2099701.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.863003721371147 + "value": 1.8628873657125555 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -40605,7 +40605,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02182763761900974 + "value": 0.021826038727080128 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40635,13 +40635,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.90623529157122 + "value": 73.90327559477647 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97657476866488 + "value": 99.97480138713286 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40701,7 +40701,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34648197185445 + "value": 42.34411472303697 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40713,13 +40713,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.80898405262761 + "value": 29.807329242208514 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588884715960247 + "value": 1.0588296886575534 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40731,7 +40731,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.45216991700806 + "value": 96.44684757994924 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40750,19 +40750,19 @@ "time" ], "times": { - "compilation_time": 24822.534, - "data": 78017.84, - "framework": 3358895.26, - "kernel_overhead": 1587971.169, - "profiling_overhead": 65032.283, - "profiling_runs": 1627873.968, + "compilation_time": 15029.206, + "data": 58526.07, + "framework": 3300689.613, + "kernel_overhead": 1577171.897, + "profiling_overhead": 48422.668, + "profiling_runs": 1616568.978, "runtimes": [ - 5580.448 + 5674.592 ], - "search_algorithm": 71.487, - "validation": 28.682 + "search_algorithm": 26.4, + "validation": 16.421 }, - "timestamp": "2026-03-05 09:00:8 UTC" + "timestamp": "2026-03-13 09:41:31 UTC" }, { "compilation_data": { @@ -40783,14 +40783,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -40798,49 +40798,49 @@ { "name": "time", "unit": "", - "value": 3341.024 + "value": 3466.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.906208881578948 + "value": 14.106891281881289 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1368.0 + "value": 10952.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837828.0 + "value": 1841100.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.858903657239731 + "value": 1.8598817527805969 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 56261.0 + "value": 68734.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099284.0 + "value": 2105214.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.227236841665717 + "value": 6.226870093893029 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -40852,7 +40852,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07295546581779942 + "value": 0.07293362013751382 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -40882,13 +40882,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.71694853486564 + "value": 98.72792562083042 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96730626043154 + "value": 99.94187638445823 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -40948,7 +40948,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.77234941685757 + "value": 42.7697103905528 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -40960,13 +40960,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.8205528667569 + "value": 49.8183075487585 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.344885751552282 + "value": 3.3447350038839323 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -40978,7 +40978,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.86253127202497 + "value": 89.85857932814986 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -40997,19 +40997,19 @@ "time" ], "times": { - "compilation_time": 26133.776, - "data": 78921.26, - "framework": 1756314.173, - "kernel_overhead": 791363.282, - "profiling_overhead": 65892.935, - "profiling_runs": 820136.696, + "compilation_time": 14099.974, + "data": 57388.796, + "framework": 1705241.5129999998, + "kernel_overhead": 785917.847, + "profiling_overhead": 47302.62, + "profiling_runs": 814632.25, "runtimes": [ - 3341.024 + 3466.592 ], - "search_algorithm": 35.939, - "validation": 26.307 + "search_algorithm": 25.746, + "validation": 15.669 }, - "timestamp": "2026-03-05 09:00:9 UTC" + "timestamp": "2026-03-13 09:41:32 UTC" }, { "compilation_data": { @@ -41030,14 +41030,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -41045,49 +41045,49 @@ { "name": "time", "unit": "", - "value": 6329.568 + "value": 6361.76 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.156435205198644 + "value": 7.638054406069288 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 13184.0 + "value": 6340.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1851336.0 + "value": 1836472.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0421575256306586 + "value": 1.012159186207411 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118511.0 + "value": 108586.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2144717.0 + "value": 2100872.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6607503194129056 + "value": 1.6606308719699006 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -41099,7 +41099,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019450315839699004 + "value": 0.019456758683876946 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41129,13 +41129,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.03408434095341 + "value": 82.03142035818168 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93390822091152 + "value": 99.97402509652693 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41195,7 +41195,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24724323448953 + "value": 43.244345527063615 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41207,13 +41207,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.57372760914939 + "value": 26.571863205538627 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9439642009597744 + "value": 0.9438979727553395 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41225,7 +41225,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.28155122418902 + "value": 94.27497605806782 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41244,19 +41244,19 @@ "time" ], "times": { - "compilation_time": 24166.253, - "data": 78628.2, - "framework": 3716924.085, - "kernel_overhead": 1764615.651, - "profiling_overhead": 65431.499, - "profiling_runs": 1808248.735, + "compilation_time": 14438.902, + "data": 57913.447, + "framework": 3664526.314, + "kernel_overhead": 1757997.859, + "profiling_overhead": 47708.651, + "profiling_runs": 1800906.357, "runtimes": [ - 6329.568 + 6361.76 ], - "search_algorithm": 35.864, - "validation": 29.999 + "search_algorithm": 25.55, + "validation": 14.45 }, - "timestamp": "2026-03-05 09:00:11 UTC" + "timestamp": "2026-03-13 09:41:34 UTC" }, { "compilation_data": { @@ -41277,14 +41277,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "2", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 2, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -41292,49 +41292,49 @@ { "name": "time", "unit": "", - "value": 5938.336 + "value": 6265.056 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.62469801900467 + "value": 7.498591184524243 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16792.0 + "value": 5608.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840888.0 + "value": 1838148.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0741959221541613 + "value": 1.0484108100397298 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118000.0 + "value": 104776.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2107124.0 + "value": 2100980.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.74226574401011 + "value": 1.7422229010057622 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -41346,7 +41346,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.020412961113973564 + "value": 0.020412346678990268 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41376,13 +41376,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 82.02649049128317 + "value": 82.02418695483709 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97233741013348 + "value": 99.97476499139685 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41442,7 +41442,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.06317023909161 + "value": 44.06080943256221 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41454,13 +41454,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.87820807530725 + "value": 27.876692018010395 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9903025573625988 + "value": 0.9902487032764924 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41472,7 +41472,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98937109747226 + "value": 94.9842482237091 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41491,19 +41491,19 @@ "time" ], "times": { - "compilation_time": 24181.83, - "data": 79218.936, - "framework": 3631007.884, - "kernel_overhead": 1722633.797, - "profiling_overhead": 64593.292, - "profiling_runs": 1764561.859, + "compilation_time": 15693.446, + "data": 58947.638, + "framework": 3575360.8310000002, + "kernel_overhead": 1712752.872, + "profiling_overhead": 48386.604, + "profiling_runs": 1755273.717, "runtimes": [ - 5938.336 + 6265.056 ], - "search_algorithm": 45.797, - "validation": 34.889 + "search_algorithm": 26.449, + "validation": 17.386 }, - "timestamp": "2026-03-05 09:00:13 UTC" + "timestamp": "2026-03-13 09:41:36 UTC" }, { "compilation_data": { @@ -41524,14 +41524,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -41539,49 +41539,49 @@ { "name": "time", "unit": "", - "value": 6877.376 + "value": 6994.592 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.237515992956766 + "value": 6.700676181312808 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 21168.0 + "value": 5452.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1845632.0 + "value": 1839792.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9344360220264809 + "value": 0.9210978102341923 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 139642.0 + "value": 125716.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109478.0 + "value": 2107871.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.014922984451935 + "value": 48.01319231114698 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -41593,7 +41593,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5624578441982503 + "value": 0.5624822550649666 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41623,13 +41623,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.48798521642885 + "value": 81.10450881377695 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93532409227332 + "value": 99.93720279558327 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41689,7 +41689,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.38788905568714 + "value": 21.386473655179945 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41701,13 +41701,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.013732451901003 + "value": 24.01432320640513 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.388947021461956 + "value": 24.38954700650521 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41719,7 +41719,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.23052863273042 + "value": 47.2317023453156 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41738,19 +41738,19 @@ "time" ], "times": { - "compilation_time": 26406.627, - "data": 82304.778, - "framework": 552693.309, - "kernel_overhead": 177725.813, - "profiling_overhead": 67007.363, - "profiling_runs": 225655.355, + "compilation_time": 14035.796, + "data": 58984.757, + "framework": 501385.99299999996, + "kernel_overhead": 172882.946, + "profiling_overhead": 48854.266, + "profiling_runs": 220664.024, "runtimes": [ - 6877.376 + 6994.592 ], - "search_algorithm": 34.745, - "validation": 29.34 + "search_algorithm": 23.968, + "validation": 13.525 }, - "timestamp": "2026-03-05 09:00:14 UTC" + "timestamp": "2026-03-13 09:41:36 UTC" }, { "compilation_data": { @@ -41771,14 +41771,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -41786,49 +41786,49 @@ { "name": "time", "unit": "", - "value": 3620.736 + "value": 3755.936 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.24837126215924 + "value": 13.033427242542903 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 1256.0 + "value": 6904.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837172.0 + "value": 1842116.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.7112299626130505 + "value": 1.6922021141170684 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61564.0 + "value": 71211.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099489.0 + "value": 2101554.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.73549124998317 + "value": 45.73463816262577 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -41840,7 +41840,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5357720378925297 + "value": 0.5357322698908262 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -41870,13 +41870,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.90251745678604 + "value": 92.78081628367683 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94671025782395 + "value": 99.94397690581044 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -41936,7 +41936,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.991484890286433 + "value": 28.990678582704803 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -41948,13 +41948,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.743590541624236 + "value": 45.74144613747044 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.31851002219517 + "value": 23.31741687867145 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -41966,7 +41966,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.912038713924275 + "value": 56.909392898460645 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -41985,19 +41985,19 @@ "time" ], "times": { - "compilation_time": 24055.848, - "data": 77380.205, - "framework": 357689.67, - "kernel_overhead": 92963.249, - "profiling_overhead": 64314.252, - "profiling_runs": 123031.964, + "compilation_time": 14700.399, + "data": 59555.341, + "framework": 320751.338, + "kernel_overhead": 90735.66, + "profiling_overhead": 49288.223, + "profiling_runs": 121172.114, "runtimes": [ - 3620.736 + 3755.936 ], - "search_algorithm": 29.801, - "validation": 22.124 + "search_algorithm": 38.673, + "validation": 17.642 }, - "timestamp": "2026-03-05 09:00:14 UTC" + "timestamp": "2026-03-13 09:41:36 UTC" }, { "compilation_data": { @@ -42018,14 +42018,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -42033,49 +42033,49 @@ { "name": "time", "unit": "", - "value": 2226.88 + "value": 2254.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.558367973079967 + "value": 21.250275492212754 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7820.0 + "value": 10188.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838924.0 + "value": 1841068.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.800476384481305 + "value": 2.8040939346759925 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 45002.0 + "value": 49335.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100456.0 + "value": 2101558.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.93013035287782 + "value": 37.924088737834445 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -42087,7 +42087,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4441285923594015 + "value": 0.44413902657542087 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42117,13 +42117,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.16744930230006 + "value": 94.3137804701467 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89070583528695 + "value": 99.90154791739802 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42183,7 +42183,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.23908417441648 + "value": 43.23514044683524 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42195,13 +42195,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 75.88087980309557 + "value": 75.87442715590245 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.488936902552865 + "value": 19.487279630861668 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42213,7 +42213,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.54425208590936 + "value": 64.5387993633631 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42232,19 +42232,19 @@ "time" ], "times": { - "compilation_time": 24803.522, - "data": 78563.459, - "framework": 234278.985, - "kernel_overhead": 34198.898, - "profiling_overhead": 64103.015, - "profiling_runs": 57413.613, + "compilation_time": 14639.351, + "data": 56748.026, + "framework": 190240.524, + "kernel_overhead": 31641.076, + "profiling_overhead": 47057.379, + "profiling_runs": 54794.043, "runtimes": [ - 2226.88 + 2254.336 ], - "search_algorithm": 34.441, - "validation": 27.724 + "search_algorithm": 22.247, + "validation": 13.792 }, - "timestamp": "2026-03-05 09:00:14 UTC" + "timestamp": "2026-03-13 09:41:37 UTC" }, { "compilation_data": { @@ -42265,14 +42265,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -42280,49 +42280,49 @@ { "name": "time", "unit": "", - "value": 1976.032 + "value": 2056.864 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 32.36140608144552 + "value": 24.216573085574243 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4828.0 + "value": 7608.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838312.0 + "value": 1839020.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.1681256238279145 + "value": 3.1692640805356103 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 37705.0 + "value": 41330.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100074.0 + "value": 2100668.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.537668578997533 + "value": 21.53412098315904 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -42334,7 +42334,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2521396370978498 + "value": 0.2521737363762462 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42364,13 +42364,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.82733943125086 + "value": 97.82925658236906 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.87658484987145 + "value": 99.91173637050002 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42430,7 +42430,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.63132467567954 + "value": 43.62179114355628 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42442,13 +42442,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 86.17000964283255 + "value": 86.15134231801818 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.234078405584128 + "value": 11.23164472603069 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42460,7 +42460,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 61.72546089922437 + "value": 61.71213048764038 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42479,19 +42479,19 @@ "time" ], "times": { - "compilation_time": 24564.823, - "data": 77751.773, - "framework": 229319.14800000002, - "kernel_overhead": 32557.192, - "profiling_overhead": 64597.861, - "profiling_runs": 54412.322, + "compilation_time": 14393.619, + "data": 57445.284, + "framework": 185436.48799999998, + "kernel_overhead": 29609.286, + "profiling_overhead": 47059.049, + "profiling_runs": 51322.869, "runtimes": [ - 1976.032 + 2056.864 ], - "search_algorithm": 36.376, - "validation": 22.448 + "search_algorithm": 24.077, + "validation": 15.346 }, - "timestamp": "2026-03-05 09:00:14 UTC" + "timestamp": "2026-03-13 09:41:37 UTC" }, { "compilation_data": { @@ -42512,14 +42512,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -42527,49 +42527,49 @@ { "name": "time", "unit": "", - "value": 1850.272 + "value": 1860.32 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 34.422002032520325 + "value": 25.678676260661646 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3596.0 + "value": 3648.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839000.0 + "value": 1838380.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3648208058219975 + "value": 3.3680625932498884 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33710.0 + "value": 34321.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100262.0 + "value": 2100374.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.45206350257764 + "value": 11.450392130443438 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -42581,7 +42581,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13413207118780235 + "value": 0.1340591086334808 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42611,13 +42611,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.09655018486 + "value": 98.12224873410426 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91773419898439 + "value": 99.87836335571507 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42677,7 +42677,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.78974534334782 + "value": 37.78229862113663 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42689,13 +42689,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 91.6428846840322 + "value": 91.62913943752184 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.1527815644797 + "value": 6.1518587268844005 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42707,7 +42707,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 66.50772527192586 + "value": 66.49779298347511 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42726,19 +42726,19 @@ "time" ], "times": { - "compilation_time": 24453.211, - "data": 76754.081, - "framework": 234888.024, - "kernel_overhead": 36524.862, - "profiling_overhead": 63785.327, - "profiling_runs": 57823.754, + "compilation_time": 14962.749, + "data": 56615.263, + "framework": 191744.306, + "kernel_overhead": 33397.105, + "profiling_overhead": 47161.535, + "profiling_runs": 54570.403, "runtimes": [ - 1850.272 + 1860.32 ], - "search_algorithm": 32.345, - "validation": 24.391 + "search_algorithm": 26.654, + "validation": 15.135 }, - "timestamp": "2026-03-05 09:00:14 UTC" + "timestamp": "2026-03-13 09:41:37 UTC" }, { "compilation_data": { @@ -42759,14 +42759,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -42774,49 +42774,49 @@ { "name": "time", "unit": "", - "value": 1790.912 + "value": 1889.088 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.56012603377408 + "value": 26.519925225524993 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5356.0 + "value": 668.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1832892.0 + "value": 1835648.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4877061363819974 + "value": 3.4726713679011216 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 33832.0 + "value": 30048.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100204.0 + "value": 2099333.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.9310913698298 + "value": 5.929496832988967 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -42828,7 +42828,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.0694503004200251 + "value": 0.06943588205438443 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -42858,13 +42858,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.68637721918192 + "value": 81.65697193869443 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91366329535798 + "value": 99.9145387141772 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -42924,7 +42924,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.639812750371014 + "value": 37.627757938144 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -42936,13 +42936,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.90474780527818 + "value": 94.88421356721027 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.3712501966962827 + "value": 3.370520770026634 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -42954,7 +42954,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 58.67380086401127 + "value": 58.6611058005592 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -42973,19 +42973,19 @@ "time" ], "times": { - "compilation_time": 23650.668, - "data": 76770.581, - "framework": 209719.948, - "kernel_overhead": 24091.563, - "profiling_overhead": 64030.772, - "profiling_runs": 44827.032, + "compilation_time": 14827.233, + "data": 57999.745, + "framework": 168008.556, + "kernel_overhead": 20752.562, + "profiling_overhead": 47588.496, + "profiling_runs": 41667.753, "runtimes": [ - 1790.912 + 1889.088 ], - "search_algorithm": 41.216, - "validation": 25.538 + "search_algorithm": 27.513, + "validation": 17.332 }, - "timestamp": "2026-03-05 09:00:15 UTC" + "timestamp": "2026-03-13 09:41:37 UTC" }, { "compilation_data": { @@ -43006,14 +43006,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -43021,49 +43021,49 @@ { "name": "time", "unit": "", - "value": 6261.504 + "value": 6320.832 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.469040018429538 + "value": 7.368079397501987 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 856.0 + "value": 9072.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840592.0 + "value": 1842412.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9973188251144917 + "value": 1.0021556079354108 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 104590.0 + "value": 113546.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099910.0 + "value": 2103907.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.286308249835255 + "value": 26.284241336142838 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -43075,7 +43075,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.30797119837713616 + "value": 0.30795584875992044 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43105,13 +43105,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.6692458008658 + "value": 97.56803115678323 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9713426916427 + "value": 99.97026853080243 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43171,7 +43171,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.02597248224945 + "value": 43.024014447322585 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43183,13 +43183,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.287742287549797 + "value": 26.286714520610623 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.400587377051751 + "value": 13.40006345679565 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43201,7 +43201,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.91610803289404 + "value": 79.9129962001864 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43220,19 +43220,19 @@ "time" ], "times": { - "compilation_time": 23945.467, - "data": 76991.868, - "framework": 2201109.058, - "kernel_overhead": 1007613.369, - "profiling_overhead": 63755.712, - "profiling_runs": 1052748.109, + "compilation_time": 15235.46, + "data": 58978.966, + "framework": 2155806.7199999997, + "kernel_overhead": 1001857.754, + "profiling_overhead": 48435.123, + "profiling_runs": 1046534.877, "runtimes": [ - 6261.504 + 6320.832 ], - "search_algorithm": 44.488, - "validation": 33.36 + "search_algorithm": 26.109, + "validation": 15.522 }, - "timestamp": "2026-03-05 09:00:16 UTC" + "timestamp": "2026-03-13 09:41:38 UTC" }, { "compilation_data": { @@ -43253,14 +43253,14 @@ "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -43268,49 +43268,49 @@ { "name": "time", "unit": "", - "value": 6164.608 + "value": 6275.488 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.095334291699471 + "value": 7.739954517107159 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 476.0 + "value": 5400.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837760.0 + "value": 1837368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0202530959895233 + "value": 1.0240053525963662 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 101974.0 + "value": 106549.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099167.0 + "value": 2100552.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.384535362937921 + "value": 13.38369666480172 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -43322,7 +43322,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1568081026707688 + "value": 0.15681089417143745 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43352,13 +43352,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.66617749294197 + "value": 98.5448901933493 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96635931109842 + "value": 99.97378997801259 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43418,7 +43418,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.235008574614 + "value": 36.23303387071551 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43430,13 +43430,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.770922111768247 + "value": 26.769408873210228 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.875734878315478 + "value": 6.875346224271767 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43448,7 +43448,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.50264999864991 + "value": 83.49794363551302 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43467,19 +43467,19 @@ "time" ], "times": { - "compilation_time": 24097.649, - "data": 77982.026, - "framework": 2787446.492, - "kernel_overhead": 1300638.781, - "profiling_overhead": 64865.803, - "profiling_runs": 1343959.882, + "compilation_time": 14429.548, + "data": 57976.449, + "framework": 2737390.726, + "kernel_overhead": 1294037.349, + "profiling_overhead": 47987.103, + "profiling_runs": 1337389.825, "runtimes": [ - 6164.608 + 6275.488 ], - "search_algorithm": 42.463, - "validation": 37.421 + "search_algorithm": 26.528, + "validation": 17.194 }, - "timestamp": "2026-03-05 09:00:17 UTC" + "timestamp": "2026-03-13 09:41:40 UTC" }, { "compilation_data": { @@ -43500,14 +43500,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -43515,49 +43515,49 @@ { "name": "time", "unit": "", - "value": 8107.936 + "value": 8165.056 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.700819651129423 + "value": 5.705653798780534 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10680.0 + "value": 1180.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912864.0 + "value": 1908076.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.79969844476198 + "value": 48.710247765670566 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 670579.0 + "value": 655836.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138422165.0 + "value": 138415628.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.1698815472034445 + "value": 5.105256002591709 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -43569,7 +43569,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.05988375446286385 + "value": 0.059622818712423684 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43599,13 +43599,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.81570087253475 + "value": 97.87349357670632 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.83095791212034 + "value": 99.68678156820337 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43665,7 +43665,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.431993883060178 + "value": 12.26992002256558 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43677,13 +43677,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.68210402402409 + "value": 20.415199621944623 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.138387370730145 + "value": 12.968835306713844 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43695,7 +43695,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.39187738019218 + "value": 31.973875102014222 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43714,19 +43714,19 @@ "time" ], "times": { - "compilation_time": 25281.454, - "data": 77183.531, - "framework": 961418.2250000001, - "kernel_overhead": 381804.215, - "profiling_overhead": 64191.091, - "profiling_runs": 438239.388, + "compilation_time": 14318.138, + "data": 59219.636, + "framework": 922731.371, + "kernel_overhead": 379127.322, + "profiling_overhead": 48793.361, + "profiling_runs": 435591.052, "runtimes": [ - 8107.936 + 8165.056 ], - "search_algorithm": 63.456, - "validation": 37.843 + "search_algorithm": 23.88, + "validation": 13.079 }, - "timestamp": "2026-03-05 09:00:18 UTC" + "timestamp": "2026-03-13 09:41:40 UTC" }, { "compilation_data": { @@ -43747,14 +43747,14 @@ "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -43762,49 +43762,49 @@ { "name": "time", "unit": "", - "value": 8116.128 + "value": 8326.368 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.781059053002408 + "value": 6.1440526996001745 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6292.0 + "value": 13272.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1995340.0 + "value": 2000892.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 53.93306690761545 + "value": 53.858081345761455 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 15951348.0 + "value": 15966372.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415389.0 + "value": 138424293.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.555559302367888 + "value": 2.55987865418965 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -43816,7 +43816,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029830086487344667 + "value": 0.02970663752605405 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -43846,13 +43846,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.59221740931332 + "value": 90.66351728643592 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.28389464211543 + "value": 98.68571798099882 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -43912,7 +43912,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.070175708306863 + "value": 10.08954102105952 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -43924,13 +43924,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.51088525696867 + "value": 20.549813724505643 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.732667030536522 + "value": 11.754934950321466 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -43942,7 +43942,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.006529622944804 + "value": 29.061597716836324 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -43961,19 +43961,19 @@ "time" ], "times": { - "compilation_time": 25812.059, - "data": 80030.98, - "framework": 935020.524, - "kernel_overhead": 366410.92, - "profiling_overhead": 66170.934, - "profiling_runs": 422407.69, + "compilation_time": 14537.364, + "data": 59228.145, + "framework": 881912.73, + "kernel_overhead": 359250.378, + "profiling_overhead": 48861.977, + "profiling_runs": 414572.23, "runtimes": [ - 8116.128 + 8326.368 ], - "search_algorithm": 42.311, - "validation": 33.443 + "search_algorithm": 24.222, + "validation": 13.499 }, - "timestamp": "2026-03-05 09:00:18 UTC" + "timestamp": "2026-03-13 09:41:41 UTC" }, { "compilation_data": { @@ -43994,14 +43994,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -44009,49 +44009,49 @@ { "name": "time", "unit": "", - "value": 9520.576 + "value": 9748.032 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.069279270043059 + "value": 5.875093301855567 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6964.0 + "value": 27268.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2199556.0 + "value": 2199304.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.44124660472498 + "value": 76.40114307391784 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 119427196.0 + "value": 118383656.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416300.0 + "value": 138421197.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.1118285546576046 + "value": 1.1016916762538722 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44063,7 +44063,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.012771289874294049 + "value": 0.012608967315315672 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44093,13 +44093,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.95876433558671 + "value": 94.40704399367904 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.61868153477063 + "value": 96.4345562299196 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44159,7 +44159,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.706456951465252 + "value": 7.781312277429116 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44171,13 +44171,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.681302876563237 + "value": 17.851944414475117 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.555069315740413 + "value": 9.647284902695477 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44189,7 +44189,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.661349242514078 + "value": 23.88971693390926 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44208,19 +44208,19 @@ "time" ], "times": { - "compilation_time": 24948.574, - "data": 78371.968, - "framework": 917842.0789999999, - "kernel_overhead": 354783.479, - "profiling_overhead": 65368.56, - "profiling_runs": 419318.072, + "compilation_time": 14464.738, + "data": 59134.2, + "framework": 874380.028, + "kernel_overhead": 351312.655, + "profiling_overhead": 48976.355, + "profiling_runs": 414956.818, "runtimes": [ - 9520.576 + 9748.032 ], - "search_algorithm": 91.964, - "validation": 46.615 + "search_algorithm": 26.525, + "validation": 16.181 }, - "timestamp": "2026-03-05 09:00:19 UTC" + "timestamp": "2026-03-13 09:41:41 UTC" }, { "compilation_data": { @@ -44241,14 +44241,14 @@ "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -44256,49 +44256,49 @@ { "name": "time", "unit": "", - "value": 3949.888 + "value": 4027.232 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.892384105960264 + "value": 12.055901728768042 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5336.0 + "value": 876.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837672.0 + "value": 1837948.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5696086950736285 + "value": 1.569771493699714 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 70478.0 + "value": 67125.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099807.0 + "value": 2100212.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.89672084077298 + "value": 20.895048302223522 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44310,7 +44310,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24479353066159498 + "value": 0.24477752421219334 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44340,13 +44340,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.49357781613146 + "value": 98.37864317763598 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95620307090576 + "value": 99.95758539785933 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44406,7 +44406,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.61265975761161 + "value": 39.60918691761892 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44418,13 +44418,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.79640143988144 + "value": 41.79309050527442 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.7348179479383 + "value": 10.733967580944505 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44436,7 +44436,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.34858064692617 + "value": 83.34199774108289 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44455,19 +44455,19 @@ "time" ], "times": { - "compilation_time": 26559.017, - "data": 78492.706, - "framework": 1424406.063, - "kernel_overhead": 624230.37, - "profiling_overhead": 65013.275, - "profiling_runs": 656669.712, + "compilation_time": 14474.966, + "data": 57305.104, + "framework": 1371169.661, + "kernel_overhead": 617559.667, + "profiling_overhead": 47135.82, + "profiling_runs": 649169.07, "runtimes": [ - 3949.888 + 4027.232 ], - "search_algorithm": 38.687, - "validation": 31.652 + "search_algorithm": 25.636, + "validation": 15.422 }, - "timestamp": "2026-03-05 09:00:20 UTC" + "timestamp": "2026-03-13 09:41:42 UTC" }, { "compilation_data": { @@ -44488,14 +44488,14 @@ "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -44503,49 +44503,49 @@ { "name": "time", "unit": "", - "value": 4253.408 + "value": 4440.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.871547616592068 + "value": 10.686826090508086 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6728.0 + "value": 4596.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839000.0 + "value": 1837464.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.480495000462905 + "value": 1.464399188039428 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 76593.0 + "value": 75962.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100509.0 + "value": 2100235.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.81887559338704 + "value": 9.818107575907034 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44557,7 +44557,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11502843508906466 + "value": 0.11502705143627252 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44587,13 +44587,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.82290966280213 + "value": 98.76863159726551 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.960623232402 + "value": 99.96821058983706 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44653,7 +44653,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.26809498250721 + "value": 43.263768865325616 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44665,13 +44665,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.27850578300555 + "value": 39.27505219768252 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.120781759796133 + "value": 5.120331512100211 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44683,7 +44683,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.93755799865345 + "value": 90.92958069648898 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44702,19 +44702,19 @@ "time" ], "times": { - "compilation_time": 24764.465, - "data": 82478.809, - "framework": 1938023.006, - "kernel_overhead": 876395.265, - "profiling_overhead": 68926.297, - "profiling_runs": 910222.635, + "compilation_time": 14019.708, + "data": 57868.448, + "framework": 1880982.1069999998, + "kernel_overhead": 870756.357, + "profiling_overhead": 47808.682, + "profiling_runs": 904548.62, "runtimes": [ - 4253.408 + 4440.928 ], - "search_algorithm": 44.293, - "validation": 32.638 + "search_algorithm": 27.175, + "validation": 14.517 }, - "timestamp": "2026-03-05 09:00:21 UTC" + "timestamp": "2026-03-13 09:41:43 UTC" }, { "compilation_data": { @@ -44735,14 +44735,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -44750,49 +44750,49 @@ { "name": "time", "unit": "", - "value": 6876.992 + "value": 6943.744 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.261553131948173 + "value": 7.000180964040381 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12252.0 + "value": 6480.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843212.0 + "value": 1837912.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9398345660512881 + "value": 0.9307740076988268 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 124479.0 + "value": 117824.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109052.0 + "value": 2101122.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.0412234130488933 + "value": 3.041058234763193 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -44804,7 +44804,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.03563169768917074 + "value": 0.03563319075127311 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -44834,13 +44834,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.80536105062056 + "value": 98.82274779127494 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9736940575387 + "value": 99.98330455390612 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -44900,7 +44900,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.47851636128131 + "value": 36.47670100773274 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -44912,13 +44912,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.33097278084056 + "value": 24.32965349705354 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6335491979324106 + "value": 1.6334606229711237 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -44930,7 +44930,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.98032480703539 + "value": 87.9755650880638 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -44949,19 +44949,19 @@ "time" ], "times": { - "compilation_time": 25075.63, - "data": 78790.388, - "framework": 3160905.192, - "kernel_overhead": 1485333.989, - "profiling_overhead": 65323.489, - "profiling_runs": 1531457.326, + "compilation_time": 15138.729, + "data": 59506.132, + "framework": 3110194.699, + "kernel_overhead": 1478032.542, + "profiling_overhead": 48921.035, + "profiling_runs": 1523734.99, "runtimes": [ - 6876.992 + 6943.744 ], - "search_algorithm": 78.878, - "validation": 34.902 + "search_algorithm": 24.905, + "validation": 15.98 }, - "timestamp": "2026-03-05 09:00:22 UTC" + "timestamp": "2026-03-13 09:41:44 UTC" }, { "compilation_data": { @@ -44982,14 +44982,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -44997,49 +44997,49 @@ { "name": "time", "unit": "", - "value": 9444.736 + "value": 9602.752 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 7.214586908299501 + "value": 6.071388025290809 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6424.0 + "value": 21956.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2200016.0 + "value": 2202756.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.74878171257203 + "value": 81.81003479648997 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 132970566.0 + "value": 132987295.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138416562.0 + "value": 138423217.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.101854189782572 + "value": 1.105124030747497 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -45051,7 +45051,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01295048572331203 + "value": 0.01273611696398749 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45081,13 +45081,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7285547840844 + "value": 98.73289631300591 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 100.18918381344841 + "value": 97.3080410621822 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45147,7 +45147,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.691286725023169 + "value": 7.787644490364035 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45159,13 +45159,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.648342034424214 + "value": 17.870100803953324 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.125171594225489 + "value": 5.189571510327753 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45177,7 +45177,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 16.999106303836307 + "value": 17.212721335644595 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45196,19 +45196,19 @@ "time" ], "times": { - "compilation_time": 27960.862, - "data": 78533.738, - "framework": 590339.854, - "kernel_overhead": 191806.95, - "profiling_overhead": 64881.801, - "profiling_runs": 255117.365, + "compilation_time": 14593.176, + "data": 59167.133, + "framework": 549645.456, + "kernel_overhead": 189479.949, + "profiling_overhead": 49145.34, + "profiling_runs": 251853.034, "runtimes": [ - 9444.736 + 9602.752 ], - "search_algorithm": 42.46, - "validation": 30.934 + "search_algorithm": 25.729, + "validation": 18.683 }, - "timestamp": "2026-03-05 09:00:23 UTC" + "timestamp": "2026-03-13 09:41:45 UTC" }, { "compilation_data": { @@ -45229,14 +45229,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -45244,49 +45244,49 @@ { "name": "time", "unit": "", - "value": 2792.544 + "value": 3219.712 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.569444444444446 + "value": 16.924268051028303 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 464.0 + "value": 6504.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839816.0 + "value": 1841052.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.2088250096649693 + "value": 2.187158297160336 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 47904.0 + "value": 54562.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099171.0 + "value": 2105839.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.860144694511263 + "value": 14.858587646059554 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -45298,7 +45298,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.1740796061448911 + "value": 0.17405341986711853 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45328,13 +45328,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68993908053892 + "value": 98.66815644297495 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.9563749554725 + "value": 99.9526478432454 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45394,7 +45394,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.947488891749785 + "value": 45.942489498294584 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45406,13 +45406,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.44510518439561 + "value": 59.43837933586531 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.74992338097345 + "value": 7.749046524744159 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45424,7 +45424,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.39925566576926 + "value": 88.38928239680611 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45443,19 +45443,19 @@ "time" ], "times": { - "compilation_time": 24808.625, - "data": 77775.321, - "framework": 1193808.3090000001, - "kernel_overhead": 513088.504, - "profiling_overhead": 64032.088, - "profiling_runs": 538912.396, + "compilation_time": 14330.999, + "data": 57768.981, + "framework": 1147520.1160000002, + "kernel_overhead": 507998.857, + "profiling_overhead": 47398.216, + "profiling_runs": 534354.062, "runtimes": [ - 2792.544 + 3219.712 ], - "search_algorithm": 43.425, - "validation": 25.346 + "search_algorithm": 29.875, + "validation": 15.517 }, - "timestamp": "2026-03-05 09:00:23 UTC" + "timestamp": "2026-03-13 09:41:45 UTC" }, { "compilation_data": { @@ -45476,14 +45476,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -45491,49 +45491,49 @@ { "name": "time", "unit": "", - "value": 3438.496 + "value": 3507.456 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.340705859969557 + "value": 13.838928399228545 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6936.0 + "value": 212.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1843920.0 + "value": 1836712.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8369101984189582 + "value": 1.7953836291584802 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64363.0 + "value": 57069.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2131844.0 + "value": 2099194.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.037968743338642 + "value": 6.037402553359811 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -45545,7 +45545,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07073497949503008 + "value": 0.07073180769904905 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45575,13 +45575,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.6244823157165 + "value": 98.63063677973165 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96464312366544 + "value": 99.96802627863947 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45641,7 +45641,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.22617457955385 + "value": 42.22264037772692 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45653,13 +45653,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.30549198166996 + "value": 48.30169123738616 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.2431665759177823 + "value": 3.2429113989944316 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45671,7 +45671,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.41329593708035 + "value": 92.40604812044778 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45690,19 +45690,19 @@ "time" ], "times": { - "compilation_time": 24721.758, - "data": 78262.588, - "framework": 1878639.315, - "kernel_overhead": 853164.604, - "profiling_overhead": 64697.951, - "profiling_runs": 882514.172, + "compilation_time": 14736.112, + "data": 57239.539, + "framework": 1830184.501, + "kernel_overhead": 848352.441, + "profiling_overhead": 47135.662, + "profiling_runs": 877456.859, "runtimes": [ - 3438.496 + 3507.456 ], - "search_algorithm": 35.283, - "validation": 23.679 + "search_algorithm": 26.572, + "validation": 16.527 }, - "timestamp": "2026-03-05 09:00:24 UTC" + "timestamp": "2026-03-13 09:41:46 UTC" }, { "compilation_data": { @@ -45723,14 +45723,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -45738,49 +45738,49 @@ { "name": "time", "unit": "", - "value": 5624.192 + "value": 6449.856 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.307695630999325 + "value": 8.55849054369323 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3148.0 + "value": 10004.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1839460.0 + "value": 1840076.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.137946424476207 + "value": 1.1421725460446115 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 98384.0 + "value": 102630.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104246.0 + "value": 2110951.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8629396254946387 + "value": 1.8628182807288678 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -45792,7 +45792,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.021823630750987578 + "value": 0.021825045546994465 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -45822,13 +45822,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 73.87128442121306 + "value": 73.8672618369765 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96404320180227 + "value": 99.97344823713026 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -45888,7 +45888,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.344028981832444 + "value": 42.34274455283249 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -45900,13 +45900,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.807248250783537 + "value": 29.806376305186394 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588268116428234 + "value": 1.0587958379894093 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -45918,7 +45918,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.44658551797258 + "value": 96.44376418488963 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -45937,19 +45937,19 @@ "time" ], "times": { - "compilation_time": 24859.089, - "data": 78048.58, - "framework": 3355439.174, - "kernel_overhead": 1586094.999, - "profiling_overhead": 65231.731, - "profiling_runs": 1626063.864, + "compilation_time": 15065.145, + "data": 57998.98, + "framework": 3294262.958, + "kernel_overhead": 1574050.303, + "profiling_overhead": 47804.926, + "profiling_runs": 1614408.749, "runtimes": [ - 5624.192 + 6449.856 ], - "search_algorithm": 60.754, - "validation": 30.371 + "search_algorithm": 25.279, + "validation": 15.74 }, - "timestamp": "2026-03-05 09:00:26 UTC" + "timestamp": "2026-03-13 09:41:48 UTC" }, { "compilation_data": { @@ -45970,14 +45970,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -45985,49 +45985,49 @@ { "name": "time", "unit": "", - "value": 3316.768 + "value": 3364.928 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.95185564093792 + "value": 14.354911920052817 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6832.0 + "value": 208.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836756.0 + "value": 1836596.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8673008943705545 + "value": 1.8597127997880867 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 61747.0 + "value": 55042.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102890.0 + "value": 2099177.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.227424115098213 + "value": 6.226595269392439 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -46039,7 +46039,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07295563474519229 + "value": 0.07294408018245042 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46069,13 +46069,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.68698568472067 + "value": 98.68898173548146 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96448362900611 + "value": 99.95521727369007 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46135,7 +46135,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.77310402813395 + "value": 42.769914651694236 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46147,13 +46147,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 49.82207497903768 + "value": 49.81880228909951 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.344987944149258 + "value": 3.3447682200933504 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46165,7 +46165,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 89.86538420599011 + "value": 89.8595043679454 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46184,19 +46184,19 @@ "time" ], "times": { - "compilation_time": 24203.763, - "data": 77936.848, - "framework": 1748959.326, - "kernel_overhead": 788849.022, - "profiling_overhead": 64563.894, - "profiling_runs": 817609.562, + "compilation_time": 14205.339, + "data": 59531.672, + "framework": 1710707.102, + "kernel_overhead": 786875.407, + "profiling_overhead": 49288.46, + "profiling_runs": 815011.563, "runtimes": [ - 3316.768 + 3364.928 ], - "search_algorithm": 43.562, - "validation": 25.205 + "search_algorithm": 24.27, + "validation": 13.345 }, - "timestamp": "2026-03-05 09:00:27 UTC" + "timestamp": "2026-03-13 09:41:49 UTC" }, { "compilation_data": { @@ -46217,14 +46217,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -46232,49 +46232,49 @@ { "name": "time", "unit": "", - "value": 6288.928 + "value": 6389.312 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.091708547091953 + "value": 7.66230961218482 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12596.0 + "value": 932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840584.0 + "value": 1838140.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0213701488695492 + "value": 1.0171856882710093 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 115520.0 + "value": 105190.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109655.0 + "value": 2103599.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6606604593534566 + "value": 1.6605635350152728 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -46286,7 +46286,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.01945609325954036 + "value": 0.019455713741598483 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46316,13 +46316,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.99438654004841 + "value": 81.99569970983828 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96857820355152 + "value": 99.97164588593354 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46382,7 +46382,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.24515528422781 + "value": 43.242688095550214 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46394,13 +46394,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.57240218981999 + "value": 26.571068486263066 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9439171188034201 + "value": 0.9438697423709171 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46412,7 +46412,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.2768883338397 + "value": 94.2721564537331 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46431,19 +46431,19 @@ "time" ], "times": { - "compilation_time": 24433.959, - "data": 79260.738, - "framework": 3719222.7299999995, - "kernel_overhead": 1765346.545, - "profiling_overhead": 66146.353, - "profiling_runs": 1808469.094, + "compilation_time": 14843.849, + "data": 57960.061, + "framework": 3659701.4299999997, + "kernel_overhead": 1755522.939, + "profiling_overhead": 47905.839, + "profiling_runs": 1798312.591, "runtimes": [ - 6288.928 + 6389.312 ], - "search_algorithm": 34.645, - "validation": 28.769 + "search_algorithm": 26.867, + "validation": 15.17 }, - "timestamp": "2026-03-05 09:00:29 UTC" + "timestamp": "2026-03-13 09:41:51 UTC" }, { "compilation_data": { @@ -46464,14 +46464,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "4", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 4, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -46479,49 +46479,49 @@ { "name": "time", "unit": "", - "value": 5936.288 + "value": 6066.144 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.611630281999474 + "value": 8.078333073531377 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16748.0 + "value": 17556.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838620.0 + "value": 1839804.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0719808512402962 + "value": 1.0685345809264712 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 114278.0 + "value": 116344.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106241.0 + "value": 2106656.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7422212148292622 + "value": 1.7421343847696686 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -46533,7 +46533,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02041312998834651 + "value": 0.020411216242384785 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46563,13 +46563,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.98400134401336 + "value": 81.9838790862558 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97694531497237 + "value": 99.97219629985578 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46629,7 +46629,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.06170121123424 + "value": 44.059706995924756 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46641,13 +46641,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.877153800759196 + "value": 27.8758644313618 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9902651069361483 + "value": 0.9902193053620951 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46659,7 +46659,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.98582051363933 + "value": 94.98142745324343 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46678,19 +46678,19 @@ "time" ], "times": { - "compilation_time": 24722.366, - "data": 77285.507, - "framework": 3615287.965, - "kernel_overhead": 1715812.871, - "profiling_overhead": 64546.034, - "profiling_runs": 1757643.553, + "compilation_time": 14149.003, + "data": 58181.702, + "framework": 3570319.691, + "kernel_overhead": 1711376.026, + "profiling_overhead": 47982.46, + "profiling_runs": 1752779.503, "runtimes": [ - 5936.288 + 6066.144 ], - "search_algorithm": 44.18, - "validation": 31.376 + "search_algorithm": 26.012, + "validation": 15.612 }, - "timestamp": "2026-03-05 09:00:31 UTC" + "timestamp": "2026-03-13 09:41:53 UTC" }, { "compilation_data": { @@ -46711,14 +46711,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "1" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 1 }, "correctness": 1, "invalidity": "correct", @@ -46726,49 +46726,49 @@ { "name": "time", "unit": "", - "value": 6865.76 + "value": 6957.568 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.862184314713947 + "value": 6.798203563933408 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 2868.0 + "value": 9156.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842304.0 + "value": 1840112.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9229860526596289 + "value": 0.9238513861004161 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 119400.0 + "value": 120875.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2105180.0 + "value": 2105704.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.27052748999128 + "value": 48.267919611155804 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -46780,7 +46780,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5655699457428771 + "value": 0.565533146761012 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -46810,13 +46810,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.11833134569186 + "value": 93.0287514508357 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.97621901757134 + "value": 99.9699135879803 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -46876,7 +46876,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.49688886078385 + "value": 21.49664130092607 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -46888,13 +46888,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.13672430186117 + "value": 24.136676121628987 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.51386061907775 + "value": 24.51381168602944 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -46906,7 +46906,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 47.47248207250062 + "value": 47.47238731087597 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -46925,19 +46925,19 @@ "time" ], "times": { - "compilation_time": 24665.438, - "data": 77947.126, - "framework": 542210.712, - "kernel_overhead": 175954.631, - "profiling_overhead": 64666.104, - "profiling_runs": 223642.851, + "compilation_time": 13997.903, + "data": 58256.998, + "framework": 501594.5850000001, + "kernel_overhead": 173866.888, + "profiling_overhead": 48243.893, + "profiling_runs": 221226.806, "runtimes": [ - 6865.76 + 6957.568 ], - "search_algorithm": 37.882, - "validation": 25.618 + "search_algorithm": 25.734, + "validation": 14.608 }, - "timestamp": "2026-03-05 09:00:31 UTC" + "timestamp": "2026-03-13 09:41:53 UTC" }, { "compilation_data": { @@ -46958,14 +46958,14 @@ "registers": 39 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -46973,49 +46973,49 @@ { "name": "time", "unit": "", - "value": 3631.2 + "value": 3682.016 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 17.310942152530977 + "value": 13.13378445464648 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6612.0 + "value": 5336.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1838884.0 + "value": 1840236.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.715406862425362 + "value": 1.7236755193112634 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 65308.0 + "value": 68993.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2101480.0 + "value": 2108100.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.72948291338111 + "value": 45.73022081998979 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -47027,7 +47027,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.5356574799970314 + "value": 0.5356231863858517 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47057,13 +47057,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.87215583888523 + "value": 93.63457191257216 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93992318613759 + "value": 99.94147683011808 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47123,7 +47123,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 28.98759654304713 + "value": 28.985227077057633 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47135,13 +47135,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.73691557471624 + "value": 45.733276468005954 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 23.315107353517458 + "value": 23.31325226201085 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47153,7 +47153,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 56.90379400679579 + "value": 56.89926639543397 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47172,19 +47172,19 @@ "time" ], "times": { - "compilation_time": 24520.518, - "data": 77963.475, - "framework": 359034.719, - "kernel_overhead": 92904.447, - "profiling_overhead": 64644.011, - "profiling_runs": 123522.786, + "compilation_time": 14604.395, + "data": 57284.626, + "framework": 316094.765, + "kernel_overhead": 90851.945, + "profiling_overhead": 47107.972, + "profiling_runs": 120850.222, "runtimes": [ - 3631.2 + 3682.016 ], - "search_algorithm": 36.809, - "validation": 27.443 + "search_algorithm": 32.486, + "validation": 15.322 }, - "timestamp": "2026-03-05 09:00:32 UTC" + "timestamp": "2026-03-13 09:41:53 UTC" }, { "compilation_data": { @@ -47205,14 +47205,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -47220,49 +47220,49 @@ { "name": "time", "unit": "", - "value": 2231.424 + "value": 2280.896 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 28.590428169125815 + "value": 21.660699479912566 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5060.0 + "value": 1816.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840244.0 + "value": 1837368.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.800416474024113 + "value": 2.7814731422150176 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 41211.0 + "value": 37855.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100272.0 + "value": 2099679.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.99212538023481 + "value": 37.987090510374344 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -47274,7 +47274,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.4449778529547002 + "value": 0.4449089774995648 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47304,13 +47304,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.46414085106828 + "value": 95.52295814422723 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.91758031659226 + "value": 99.91324825960773 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47370,7 +47370,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.31002983312871 + "value": 43.3051003511832 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47382,13 +47382,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.00553042181487 + "value": 75.99706093293204 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.520951661071592 + "value": 19.518776391954223 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47400,7 +47400,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 64.65031582143423 + "value": 64.6431116728929 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47419,19 +47419,19 @@ "time" ], "times": { - "compilation_time": 24528.642, - "data": 77323.318, - "framework": 231816.72, - "kernel_overhead": 33615.581, - "profiling_overhead": 63989.701, - "profiling_runs": 56888.12, + "compilation_time": 14354.838, + "data": 57959.172, + "framework": 203667.863, + "kernel_overhead": 37275.716, + "profiling_overhead": 48095.971, + "profiling_runs": 60337.004, "runtimes": [ - 2231.424 + 2280.896 ], - "search_algorithm": 36.559, - "validation": 23.36 + "search_algorithm": 38.689, + "validation": 17.502 }, - "timestamp": "2026-03-05 09:00:32 UTC" + "timestamp": "2026-03-13 09:41:53 UTC" }, { "compilation_data": { @@ -47452,14 +47452,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -47467,49 +47467,49 @@ { "name": "time", "unit": "", - "value": 1936.672 + "value": 1967.104 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 32.640868168657256 + "value": 25.006088650754993 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6948.0 + "value": 2332.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840076.0 + "value": 1837604.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.2223475633067986 + "value": 3.2050074045514734 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 39289.0 + "value": 33843.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104429.0 + "value": 2099273.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 21.79161473305648 + "value": 21.78864540604333 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -47521,7 +47521,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.2552221573043323 + "value": 0.2551985966569358 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47551,13 +47551,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.73632578735163 + "value": 97.00789740239885 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89381028891323 + "value": 99.9007219204442 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47617,7 +47617,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.157755051299716 + "value": 44.150110665681744 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47629,13 +47629,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.20843607952823 + "value": 87.19435253758117 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.369459195915057 + "value": 11.367623109147544 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47647,7 +47647,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.46935035668396 + "value": 62.45926200106487 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47666,19 +47666,19 @@ "time" ], "times": { - "compilation_time": 24698.542, - "data": 76695.425, - "framework": 227253.72, - "kernel_overhead": 32495.878, - "profiling_overhead": 63719.927, - "profiling_runs": 54342.49, + "compilation_time": 14882.971, + "data": 56807.149, + "framework": 185344.615, + "kernel_overhead": 29928.914, + "profiling_overhead": 47267.7, + "profiling_runs": 51340.852, "runtimes": [ - 1936.672 + 1967.104 ], - "search_algorithm": 43.634, - "validation": 24.552 + "search_algorithm": 27.15, + "validation": 13.97 }, - "timestamp": "2026-03-05 09:00:32 UTC" + "timestamp": "2026-03-13 09:41:54 UTC" }, { "compilation_data": { @@ -47699,14 +47699,14 @@ "registers": 40 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -47714,49 +47714,49 @@ { "name": "time", "unit": "", - "value": 1827.712 + "value": 1943.488 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.021866332538565 + "value": 26.74439829972468 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4848.0 + "value": 1560.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837672.0 + "value": 1838660.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.4246966512993438 + "value": 3.4327579852753622 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 34480.0 + "value": 32040.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100020.0 + "value": 2103086.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.654054812821633 + "value": 11.652994098161274 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -47768,7 +47768,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.13647255755436913 + "value": 0.13645152306461591 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -47798,13 +47798,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.08988486958107 + "value": 98.10847554544715 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.89894430902328 + "value": 99.91000326359884 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -47864,7 +47864,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 38.45647812040454 + "value": 38.44572064024574 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -47876,13 +47876,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 93.25950999934733 + "value": 93.23481470253309 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.261319641069463 + "value": 6.259661631639794 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -47894,7 +47894,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 67.68100480283489 + "value": 67.66308274317251 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -47913,19 +47913,19 @@ "time" ], "times": { - "compilation_time": 24577.089, - "data": 77136.855, - "framework": 234084.409, - "kernel_overhead": 35825.818, - "profiling_overhead": 64064.571, - "profiling_runs": 57057.165, + "compilation_time": 15315.73, + "data": 57706.358, + "framework": 194294.569, + "kernel_overhead": 34080.039, + "profiling_overhead": 47442.109, + "profiling_runs": 55066.063, "runtimes": [ - 1827.712 + 1943.488 ], - "search_algorithm": 44.633, - "validation": 24.566 + "search_algorithm": 26.067, + "validation": 13.637 }, - "timestamp": "2026-03-05 09:00:32 UTC" + "timestamp": "2026-03-13 09:41:54 UTC" }, { "compilation_data": { @@ -47946,14 +47946,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "0", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 0, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -47961,49 +47961,49 @@ { "name": "time", "unit": "", - "value": 1799.904 + "value": 1858.496 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 35.56145848786036 + "value": 26.82262485059017 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6132.0 + "value": 932.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1834916.0 + "value": 1837392.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.5184998841520843 + "value": 3.506110936473611 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 35259.0 + "value": 29256.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100200.0 + "value": 2099466.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.961831917408487 + "value": 5.961696163552753 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -48015,7 +48015,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.06977761564981086 + "value": 0.06975668495887376 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48045,13 +48045,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.25260497346576 + "value": 81.30424595723531 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.82755053993611 + "value": 99.82342555279108 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48111,7 +48111,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 37.85417208805649 + "value": 37.842544517341395 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48123,13 +48123,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.43428046859238 + "value": 95.4095961641935 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.390060500043992 + "value": 3.389183652805213 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48141,7 +48141,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.00117852168162 + "value": 58.98591772604958 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48160,19 +48160,19 @@ "time" ], "times": { - "compilation_time": 24334.354, - "data": 76929.907, - "framework": 209143.915, - "kernel_overhead": 23648.318, - "profiling_overhead": 63803.298, - "profiling_runs": 44762.392, + "compilation_time": 14806.173, + "data": 57020.7, + "framework": 167731.686, + "kernel_overhead": 21357.409, + "profiling_overhead": 47219.056, + "profiling_runs": 42134.521, "runtimes": [ - 1799.904 + 1858.496 ], - "search_algorithm": 36.057, - "validation": 22.071 + "search_algorithm": 22.544, + "validation": 14.879 }, - "timestamp": "2026-03-05 09:00:32 UTC" + "timestamp": "2026-03-13 09:41:54 UTC" }, { "compilation_data": { @@ -48193,14 +48193,14 @@ "registers": 22 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "2" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 2 }, "correctness": 1, "invalidity": "correct", @@ -48208,49 +48208,49 @@ { "name": "time", "unit": "", - "value": 6304.704 + "value": 6408.896 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.536935496657243 + "value": 7.609012622964633 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6852.0 + "value": 10012.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840988.0 + "value": 1839448.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9961468944835492 + "value": 1.0078509339919588 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 109059.0 + "value": 112793.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2100242.0 + "value": 2104546.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.223188148362546 + "value": 26.221748235228105 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -48262,7 +48262,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.3072354953008165 + "value": 0.30720498248080963 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48292,13 +48292,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.97716782907379 + "value": 97.28208326817835 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96873945483638 + "value": 99.96462344967408 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48358,7 +48358,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.923889023291586 + "value": 42.92140548617156 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48370,13 +48370,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.225627206373623 + "value": 26.2241023544627 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.368923243874056 + "value": 13.368145926786651 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48388,7 +48388,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 79.72730278941437 + "value": 79.72266715843195 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48407,19 +48407,19 @@ "time" ], "times": { - "compilation_time": 24348.374, - "data": 78186.905, - "framework": 2195452.199, - "kernel_overhead": 1003753.289, - "profiling_overhead": 65048.167, - "profiling_runs": 1048463.838, + "compilation_time": 14990.5, + "data": 58655.271, + "framework": 2150695.997, + "kernel_overhead": 998982.264, + "profiling_overhead": 48983.972, + "profiling_runs": 1044074.49, "runtimes": [ - 6304.704 + 6408.896 ], - "search_algorithm": 42.057, - "validation": 28.959 + "search_algorithm": 27.353, + "validation": 19.129 }, - "timestamp": "2026-03-05 09:00:33 UTC" + "timestamp": "2026-03-13 09:41:55 UTC" }, { "compilation_data": { @@ -48440,14 +48440,14 @@ "registers": 25 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -48455,49 +48455,49 @@ { "name": "time", "unit": "", - "value": 6144.608 + "value": 6281.056 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.087598722579013 + "value": 7.773107973778948 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 104.0 + "value": 12280.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837524.0 + "value": 1839436.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.018433280485753 + "value": 1.026547350431687 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 99190.0 + "value": 112261.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2098982.0 + "value": 2101595.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.378509483061752 + "value": 13.377647005925072 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -48509,7 +48509,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.15673982190871424 + "value": 0.15672075092809026 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48539,13 +48539,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.1517333931006 + "value": 98.14531652982855 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96910015295781 + "value": 99.96198889272925 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48605,7 +48605,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.21758260803667 + "value": 36.21613337334592 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48617,13 +48617,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.75853128432482 + "value": 26.75717885835255 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.8725524685326445 + "value": 6.8722051169401555 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48635,7 +48635,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.46404006601857 + "value": 83.45982163809602 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48654,19 +48654,19 @@ "time" ], "times": { - "compilation_time": 24821.827, - "data": 77857.568, - "framework": 2773804.949, - "kernel_overhead": 1294252.32, - "profiling_overhead": 64444.816, - "profiling_runs": 1337250.245, + "compilation_time": 14650.555, + "data": 58076.484, + "framework": 2724508.1679999996, + "kernel_overhead": 1287551.892, + "profiling_overhead": 47993.258, + "profiling_runs": 1330886.534, "runtimes": [ - 6144.608 + 6281.056 ], - "search_algorithm": 45.613, - "validation": 29.089 + "search_algorithm": 26.515, + "validation": 15.489 }, - "timestamp": "2026-03-05 09:00:35 UTC" + "timestamp": "2026-03-13 09:41:56 UTC" }, { "compilation_data": { @@ -48687,14 +48687,14 @@ "registers": 27 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -48702,49 +48702,49 @@ { "name": "time", "unit": "", - "value": 8064.704 + "value": 8149.536 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.090677234665323 + "value": 5.761415962054825 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19412.0 + "value": 5744.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1912388.0 + "value": 1911316.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 48.85853826682277 + "value": 48.82877785060414 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 628090.0 + "value": 610370.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138421055.0 + "value": 138420321.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.107106411004788 + "value": 5.187229761894996 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -48756,7 +48756,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.059446845362297024 + "value": 0.05995827951844348 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -48786,13 +48786,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.65110886677385 + "value": 96.42944312491501 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.16438543993496 + "value": 98.73024022029983 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -48852,7 +48852,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.424864317518265 + "value": 12.45976285165539 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -48864,13 +48864,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.67062284628662 + "value": 20.72896750103733 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.13109390772407 + "value": 13.168157577563267 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -48882,7 +48882,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 32.373903885939896 + "value": 32.465281928429185 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -48901,19 +48901,19 @@ "time" ], "times": { - "compilation_time": 24354.097, - "data": 78575.183, - "framework": 957781.711, - "kernel_overhead": 379431.39, - "profiling_overhead": 65399.014, - "profiling_runs": 434376.124, + "compilation_time": 14489.998, + "data": 58602.065, + "framework": 911871.318, + "kernel_overhead": 374904.639, + "profiling_overhead": 48598.67, + "profiling_runs": 429765.944, "runtimes": [ - 8064.704 + 8149.536 ], - "search_algorithm": 56.609, - "validation": 34.37 + "search_algorithm": 26.47, + "validation": 15.813 }, - "timestamp": "2026-03-05 09:00:35 UTC" + "timestamp": "2026-03-13 09:41:57 UTC" }, { "compilation_data": { @@ -48934,14 +48934,14 @@ "registers": 34 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -48949,49 +48949,49 @@ { "name": "time", "unit": "", - "value": 8090.496 + "value": 8238.911 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.135632238833727 + "value": 6.361084892454626 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 12864.0 + "value": 30340.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2002940.0 + "value": 2005484.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 51.74972333189757 + "value": 51.713917583884076 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 8323941.0 + "value": 8368787.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138424331.0 + "value": 138421550.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 2.5947053079077986 + "value": 2.562987756360134 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49003,7 +49003,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.029970745596199177 + "value": 0.029862256695696095 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49033,13 +49033,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 85.36379920850604 + "value": 85.37498272636932 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 98.56486595079154 + "value": 98.10492132019559 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49099,7 +49099,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.191693973159959 + "value": 10.20188952293199 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49111,13 +49111,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.757933160368353 + "value": 20.77976003982443 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 11.87398373895094 + "value": 11.886469182936679 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49129,7 +49129,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.355912389466752 + "value": 29.386780007938107 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49148,19 +49148,19 @@ "time" ], "times": { - "compilation_time": 24215.945, - "data": 78247.447, - "framework": 922000.013, - "kernel_overhead": 361865.57, - "profiling_overhead": 64844.229, - "profiling_runs": 417042.767, + "compilation_time": 14410.33, + "data": 58612.282, + "framework": 888043.285, + "kernel_overhead": 363576.998, + "profiling_overhead": 48412.581, + "profiling_runs": 417441.424, "runtimes": [ - 8090.496 + 8238.911 ], - "search_algorithm": 42.498, - "validation": 26.252 + "search_algorithm": 25.961, + "validation": 16.454 }, - "timestamp": "2026-03-05 09:00:36 UTC" + "timestamp": "2026-03-13 09:41:57 UTC" }, { "compilation_data": { @@ -49181,14 +49181,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "1", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 1, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -49196,49 +49196,49 @@ { "name": "time", "unit": "", - "value": 8625.12 + "value": 8762.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.063457950698561 + "value": 6.159756287228185 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6728.0 + "value": 6560.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2200652.0 + "value": 2198684.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 61.356680346013235 + "value": 61.65353095553267 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 46739764.0 + "value": 47820018.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138415274.0 + "value": 138416010.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.2064805030356447 + "value": 1.2157275934303793 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49250,7 +49250,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.014021967429461848 + "value": 0.014197771490611412 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49280,13 +49280,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 76.05859515468921 + "value": 75.6739226901639 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.25660372418342 + "value": 99.67702164053263 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49346,7 +49346,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.4053607003467 + "value": 8.476945889420886 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49358,13 +49358,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 19.28804614709385 + "value": 19.447501897768245 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.423361852195372 + "value": 10.50953258073975 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49376,7 +49376,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.811520214807686 + "value": 26.024906023847738 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49395,19 +49395,19 @@ "time" ], "times": { - "compilation_time": 24041.102, - "data": 78706.309, - "framework": 908163.1880000001, - "kernel_overhead": 353274.014, - "profiling_overhead": 65015.233, - "profiling_runs": 411167.632, + "compilation_time": 14417.675, + "data": 59053.122, + "framework": 865367.642, + "kernel_overhead": 350157.489, + "profiling_overhead": 48769.374, + "profiling_runs": 407387.657, "runtimes": [ - 8625.12 + 8762.336 ], - "search_algorithm": 37.178, - "validation": 38.88 + "search_algorithm": 24.558, + "validation": 14.715 }, - "timestamp": "2026-03-05 09:00:37 UTC" + "timestamp": "2026-03-13 09:41:58 UTC" }, { "compilation_data": { @@ -49428,14 +49428,14 @@ "registers": 26 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "4" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 4 }, "correctness": 1, "invalidity": "correct", @@ -49443,49 +49443,49 @@ { "name": "time", "unit": "", - "value": 3950.784 + "value": 4169.6 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 15.90295084136471 + "value": 12.058365864691888 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 3592.0 + "value": 2828.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1842392.0 + "value": 1839644.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.5775104373684279 + "value": 1.5739694087409546 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 71749.0 + "value": 72368.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104966.0 + "value": 2104321.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 20.926013506303686 + "value": 20.925691949916235 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49497,7 +49497,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.24516418972791373 + "value": 0.24513472260631197 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49527,13 +49527,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.1708214591034 + "value": 98.15840342597365 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96507347393995 + "value": 99.95796721512602 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49593,7 +49593,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.66936716748703 + "value": 39.66659858455398 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49605,13 +49605,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 41.85597388452374 + "value": 41.85391835894239 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 10.750118292607173 + "value": 10.74959035976743 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49623,7 +49623,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 83.46743014893643 + "value": 83.46333110590963 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49642,19 +49642,19 @@ "time" ], "times": { - "compilation_time": 24116.146, - "data": 79245.053, - "framework": 1415123.358, - "kernel_overhead": 618644.604, - "profiling_overhead": 66439.329, - "profiling_runs": 650794.372, + "compilation_time": 14322.111, + "data": 57321.319, + "framework": 1369255.454, + "kernel_overhead": 616324.009, + "profiling_overhead": 47086.806, + "profiling_runs": 648523.32, "runtimes": [ - 3950.784 + 4169.6 ], - "search_algorithm": 41.877, - "validation": 28.555 + "search_algorithm": 26.303, + "validation": 14.986 }, - "timestamp": "2026-03-05 09:00:37 UTC" + "timestamp": "2026-03-13 09:41:59 UTC" }, { "compilation_data": { @@ -49675,14 +49675,14 @@ "registers": 30 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -49690,49 +49690,49 @@ { "name": "time", "unit": "", - "value": 4236.48 + "value": 4308.064 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 14.908583555638275 + "value": 11.291026194023997 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 5364.0 + "value": 5532.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840228.0 + "value": 1837308.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4869796502321442 + "value": 1.4728514575497045 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 78356.0 + "value": 74173.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2108049.0 + "value": 2100786.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 9.821006571665619 + "value": 9.820071068959301 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49744,7 +49744,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.11505348118941593 + "value": 0.11504682414168847 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -49774,13 +49774,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.7831965909778 + "value": 98.7808463662608 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96000825308357 + "value": 99.96311717300892 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -49840,7 +49840,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.277425103423695 + "value": 43.27406630156071 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -49852,13 +49852,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 39.287299923543024 + "value": 39.28380495151211 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.1219282615165955 + "value": 5.1214726181903005 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -49870,7 +49870,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.95793194170592 + "value": 90.94984038466983 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -49889,19 +49889,19 @@ "time" ], "times": { - "compilation_time": 24685.986, - "data": 78041.327, - "framework": 1918435.955, - "kernel_overhead": 871168.803, - "profiling_overhead": 64847.741, - "profiling_runs": 904378.084, + "compilation_time": 14004.383, + "data": 57817.952, + "framework": 1874202.534, + "kernel_overhead": 867689.005, + "profiling_overhead": 47731.542, + "profiling_runs": 900964.035, "runtimes": [ - 4236.48 + 4308.064 ], - "search_algorithm": 44.42, - "validation": 28.369 + "search_algorithm": 26.904, + "validation": 15.222 }, - "timestamp": "2026-03-05 09:00:38 UTC" + "timestamp": "2026-03-13 09:42:0 UTC" }, { "compilation_data": { @@ -49922,14 +49922,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -49937,49 +49937,49 @@ { "name": "time", "unit": "", - "value": 6838.592 + "value": 6947.456 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 9.285256901595575 + "value": 6.992910675973478 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 19008.0 + "value": 5684.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841704.0 + "value": 1838332.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.9416504668621262 + "value": 0.9289182236994363 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 130057.0 + "value": 115747.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2106727.0 + "value": 2100857.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.039212143170935 + "value": 3.0389093450763913 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -49991,7 +49991,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.035603269706009945 + "value": 0.03560326351659298 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50021,13 +50021,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.56857093128562 + "value": 98.55755298323754 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95587674007793 + "value": 99.96194263783444 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50087,7 +50087,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 36.45602488721565 + "value": 36.453718280763894 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50099,13 +50099,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.31589441793252 + "value": 24.31441465217009 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6325368566727154 + "value": 1.63243750716474 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50117,7 +50117,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 87.92580889586094 + "value": 87.92045809118143 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50136,19 +50136,19 @@ "time" ], "times": { - "compilation_time": 24861.159, - "data": 79806.908, - "framework": 3152635.467, - "kernel_overhead": 1480914.161, - "profiling_overhead": 64942.192, - "profiling_runs": 1526972.206, + "compilation_time": 14688.476, + "data": 58110.86, + "framework": 3105753.9129999997, + "kernel_overhead": 1477048.88, + "profiling_overhead": 47922.855, + "profiling_runs": 1522671.318, "runtimes": [ - 6838.592 + 6947.456 ], - "search_algorithm": 45.49, - "validation": 29.497 + "search_algorithm": 26.463, + "validation": 15.887 }, - "timestamp": "2026-03-05 09:00:40 UTC" + "timestamp": "2026-03-13 09:42:1 UTC" }, { "compilation_data": { @@ -50169,14 +50169,14 @@ "registers": 32 }, "configuration": { - "INNER_UNROLL_FACTOR": "2", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 2, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -50184,49 +50184,49 @@ { "name": "time", "unit": "", - "value": 9165.92 + "value": 9408.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 8.145448480634432 + "value": 6.034830869377944 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 22636.0 + "value": 29292.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 2196596.0 + "value": 2194492.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 79.04288350906616 + "value": 79.05656990975108 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 118168995.0 + "value": 118147061.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 138425334.0 + "value": 138423319.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.118574600383318 + "value": 1.1260423660854393 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50238,7 +50238,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.013013605868966799 + "value": 0.013125143716478438 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50268,13 +50268,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.04285060999976 + "value": 94.08860375161854 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 97.27988108658779 + "value": 98.42772857457008 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50334,7 +50334,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.959519426128018 + "value": 7.934046371401735 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50346,13 +50346,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 18.264732318029502 + "value": 18.206451048316122 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 5.3041745830801 + "value": 5.287249395012702 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50364,7 +50364,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 17.592829727686745 + "value": 17.53669244428573 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50383,19 +50383,19 @@ "time" ], "times": { - "compilation_time": 24135.997, - "data": 77924.161, - "framework": 586277.313, - "kernel_overhead": 191176.399, - "profiling_overhead": 64912.126, - "profiling_runs": 252264.627, + "compilation_time": 14140.094, + "data": 59115.35, + "framework": 546988.5319999999, + "kernel_overhead": 189143.778, + "profiling_overhead": 49029.821, + "profiling_runs": 249699.583, "runtimes": [ - 9165.92 + 9408.8 ], - "search_algorithm": 48.631, - "validation": 27.454 + "search_algorithm": 26.147, + "validation": 15.918 }, - "timestamp": "2026-03-05 09:00:40 UTC" + "timestamp": "2026-03-13 09:42:2 UTC" }, { "compilation_data": { @@ -50416,14 +50416,14 @@ "registers": 31 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "8" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 8 }, "correctness": 1, "invalidity": "correct", @@ -50431,49 +50431,49 @@ { "name": "time", "unit": "", - "value": 2791.648 + "value": 2921.344 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 22.490874843847593 + "value": 16.89516365556077 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 6028.0 + "value": 6424.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1837576.0 + "value": 1836960.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.217034693898057 + "value": 2.1811783595689165 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 50824.0 + "value": 52680.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102377.0 + "value": 2100895.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.86756301873132 + "value": 14.86593618208509 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50485,7 +50485,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.17416611135391405 + "value": 0.1741458097014862 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50515,13 +50515,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.55989876475215 + "value": 98.59520411907219 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95610568054649 + "value": 99.95840777188033 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50581,7 +50581,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 45.97136285728396 + "value": 45.96481466681637 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50593,13 +50593,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 59.47480540321403 + "value": 59.46650315508768 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.753795430985422 + "value": 7.752713057816606 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50611,7 +50611,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 88.44344800426047 + "value": 88.43110194536203 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50630,19 +50630,19 @@ "time" ], "times": { - "compilation_time": 24198.094, - "data": 78770.391, - "framework": 1196677.217, - "kernel_overhead": 512842.886, - "profiling_overhead": 65554.928, - "profiling_runs": 539509.012, + "compilation_time": 14451.748, + "data": 57410.566, + "framework": 1145370.005, + "kernel_overhead": 507356.419, + "profiling_overhead": 47246.96, + "profiling_runs": 533356.06, "runtimes": [ - 2791.648 + 2921.344 ], - "search_algorithm": 45.382, - "validation": 30.198 + "search_algorithm": 25.234, + "validation": 13.122 }, - "timestamp": "2026-03-05 09:00:41 UTC" + "timestamp": "2026-03-13 09:42:2 UTC" }, { "compilation_data": { @@ -50663,14 +50663,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -50678,49 +50678,49 @@ { "name": "time", "unit": "", - "value": 3516.288 + "value": 3611.36 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.178393894070034 + "value": 13.573401435541378 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10672.0 + "value": 10524.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1840604.0 + "value": 1841192.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8007282731522503 + "value": 1.7962005635465745 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 67680.0 + "value": 67629.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104923.0 + "value": 2105063.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.018119050830337 + "value": 6.017813733210885 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50732,7 +50732,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07050194654910528 + "value": 0.07047766455097934 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -50762,13 +50762,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 97.87885903881602 + "value": 97.91542773767054 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.94660232742486 + "value": 99.92697183606303 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -50828,7 +50828,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.09399134711855 + "value": 42.08823580657388 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -50840,13 +50840,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 48.15504251612038 + "value": 48.14791387094246 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.233065598616481 + "value": 3.2325869908469667 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -50858,7 +50858,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 92.12552685022465 + "value": 92.11188902210881 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -50877,19 +50877,19 @@ "time" ], "times": { - "compilation_time": 24698.605, - "data": 80136.006, - "framework": 1879396.657, - "kernel_overhead": 851819.686, - "profiling_overhead": 65983.091, - "profiling_runs": 881457.874, + "compilation_time": 14000.68, + "data": 57196.096, + "framework": 1828222.161, + "kernel_overhead": 847292.162, + "profiling_overhead": 47079.024, + "profiling_runs": 876654.879, "runtimes": [ - 3516.288 + 3611.36 ], - "search_algorithm": 35.249, - "validation": 28.315 + "search_algorithm": 38.218, + "validation": 16.724 }, - "timestamp": "2026-03-05 09:00:42 UTC" + "timestamp": "2026-03-13 09:42:3 UTC" }, { "compilation_data": { @@ -50910,14 +50910,14 @@ "registers": 56 }, "configuration": { - "INNER_UNROLL_FACTOR": "4", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 4, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -50925,49 +50925,49 @@ { "name": "time", "unit": "", - "value": 5563.296 + "value": 5676.8 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 11.221908793564845 + "value": 8.569879143564238 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 460.0 + "value": 6608.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1835236.0 + "value": 1837268.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.1300387376468086 + "value": 1.1291042753164888 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 90916.0 + "value": 97320.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2099287.0 + "value": 2100996.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.8625685915885937 + "value": 1.8624342616460055 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -50979,7 +50979,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02181860098945793 + "value": 0.021817727798704188 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51009,13 +51009,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 65.51030609794789 + "value": 65.5400353014946 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.93690497125672 + "value": 99.94063944106135 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51075,7 +51075,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.34571709523101 + "value": 42.342481552246475 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51087,13 +51087,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.80847088087678 + "value": 29.806164126787955 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.0588702424725516 + "value": 1.0587883008905388 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51105,7 +51105,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 96.4505062399428 + "value": 96.44304233478216 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51124,19 +51124,19 @@ "time" ], "times": { - "compilation_time": 23581.835, - "data": 75326.499, - "framework": 3354481.438, - "kernel_overhead": 1588474.409, - "profiling_overhead": 62603.051, - "profiling_runs": 1628077.479, + "compilation_time": 14567.808, + "data": 58130.116, + "framework": 3292156.156, + "kernel_overhead": 1573616.662, + "profiling_overhead": 47530.776, + "profiling_runs": 1612878.602, "runtimes": [ - 5563.296 + 5676.8 ], - "search_algorithm": 45.147, - "validation": 32.326 + "search_algorithm": 27.123, + "validation": 15.216 }, - "timestamp": "2026-03-05 09:00:44 UTC" + "timestamp": "2026-03-13 09:42:5 UTC" }, { "compilation_data": { @@ -51157,14 +51157,14 @@ "registers": 38 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "16" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 16 }, "correctness": 1, "invalidity": "correct", @@ -51172,49 +51172,49 @@ { "name": "time", "unit": "", - "value": 3350.72 + "value": 3676.288 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 18.988455957664733 + "value": 14.332805645433433 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 9892.0 + "value": 9048.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1841048.0 + "value": 1839384.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.8782185453861036 + "value": 1.856549209899996 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 64533.0 + "value": 64755.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2104827.0 + "value": 2101279.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.251894138926755 + "value": 6.25191366202367 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -51226,7 +51226,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.07321920996067269 + "value": 0.07322852143641607 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51256,13 +51256,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 98.28363392705725 + "value": 98.24297297292934 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.90941576963502 + "value": 99.93176150499153 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51322,7 +51322,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 42.951574176414084 + "value": 42.94624167489795 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51334,13 +51334,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 50.029632957788465 + "value": 50.02480680922421 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 3.358923111179645 + "value": 3.35859908997477 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51352,7 +51352,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 90.23981919665971 + "value": 90.23111414031398 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51371,19 +51371,19 @@ "time" ], "times": { - "compilation_time": 24972.932, - "data": 77391.656, - "framework": 1747351.6519999998, - "kernel_overhead": 788741.945, - "profiling_overhead": 63335.835, - "profiling_runs": 817882.216, + "compilation_time": 14365.379, + "data": 58313.609, + "framework": 1703970.994, + "kernel_overhead": 784748.147, + "profiling_overhead": 47629.487, + "profiling_runs": 813279.751, "runtimes": [ - 3350.72 + 3676.288 ], - "search_algorithm": 54.468, - "validation": 26.877 + "search_algorithm": 24.754, + "validation": 11.979 }, - "timestamp": "2026-03-05 09:00:45 UTC" + "timestamp": "2026-03-13 09:42:6 UTC" }, { "compilation_data": { @@ -51404,14 +51404,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "8", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 8, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -51419,49 +51419,49 @@ { "name": "time", "unit": "", - "value": 6236.736 + "value": 6354.176 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.005895814408497 + "value": 7.621236147855375 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 644.0 + "value": 308.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1832244.0 + "value": 1835988.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0167806807657565 + "value": 1.0114844084563546 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 103356.0 + "value": 101498.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2102967.0 + "value": 2099181.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.6630653180086694 + "value": 1.663043900490872 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -51473,7 +51473,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.019484653742100056 + "value": 0.019483381522251438 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51503,13 +51503,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.88728529120492 + "value": 81.88845506242292 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.95878281391381 + "value": 99.95783396994162 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51569,7 +51569,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 43.31271424638746 + "value": 43.31035234973737 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51581,13 +51581,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 26.614016791373185 + "value": 26.612531686490854 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9453953718615231 + "value": 0.9453426172813523 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51599,7 +51599,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 94.42454571377702 + "value": 94.4192766724688 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51618,19 +51618,19 @@ "time" ], "times": { - "compilation_time": 24979.427, - "data": 78284.69, - "framework": 3708605.554, - "kernel_overhead": 1761394.898, - "profiling_overhead": 64543.004, - "profiling_runs": 1804382.962, + "compilation_time": 14526.573, + "data": 58508.587, + "framework": 3665366.315, + "kernel_overhead": 1757915.241, + "profiling_overhead": 48496.154, + "profiling_runs": 1800446.333, "runtimes": [ - 6236.736 + 6354.176 ], - "search_algorithm": 50.199, - "validation": 29.194 + "search_algorithm": 35.71, + "validation": 14.574 }, - "timestamp": "2026-03-05 09:00:47 UTC" + "timestamp": "2026-03-13 09:42:8 UTC" }, { "compilation_data": { @@ -51651,14 +51651,14 @@ "registers": 48 }, "configuration": { - "INNER_UNROLL_FACTOR": "16", - "USE_CONSTANT_MEMORY": "0", - "USE_SOA": "1", - "VECTOR_SIZE": "1", - "WORK_GROUP_SIZE_X": "32", - "WORK_GROUP_SIZE_Y": "8", - "WORK_GROUP_SIZE_Z": "1", - "Z_ITERATIONS": "32" + "INNER_UNROLL_FACTOR": 16, + "USE_CONSTANT_MEMORY": 0, + "USE_SOA": 1, + "VECTOR_SIZE": 1, + "WORK_GROUP_SIZE_X": 32, + "WORK_GROUP_SIZE_Y": 8, + "WORK_GROUP_SIZE_Z": 1, + "Z_ITERATIONS": 32 }, "correctness": 1, "invalidity": "correct", @@ -51666,49 +51666,49 @@ { "name": "time", "unit": "", - "value": 5932.8 + "value": 6112.544 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 10.62023681160696 + "value": 8.027909765336084 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 16728.0 + "value": 508.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 1836556.0 + "value": 1833380.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.0755796159854507 + "value": 1.0621239917350704 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 116918.0 + "value": 97334.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 2109085.0 + "value": 2099183.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 1.7439376209537754 + "value": 1.743817551532071 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -51720,7 +51720,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 0.02043118649027698 + "value": 0.02042898338076932 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -51750,13 +51750,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 81.85709614989365 + "value": 81.84946052250713 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 99.96637055468311 + "value": 99.95744980086064 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -51816,7 +51816,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 44.10535746174518 + "value": 44.104512797632204 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -51828,13 +51828,13 @@ "name": "smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 27.90476417213291 + "value": 27.904245287815343 }, { "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 0.9912458952747409 + "value": 0.991227463226839 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -51846,7 +51846,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 95.07991291944529 + "value": 95.078144197065 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -51865,19 +51865,19 @@ "time" ], "times": { - "compilation_time": 24780.61, - "data": 77758.398, - "framework": 3620788.844, - "kernel_overhead": 1718387.76, - "profiling_overhead": 64627.364, - "profiling_runs": 1760015.322, + "compilation_time": 17598.645, + "data": 58050.272, + "framework": 3573123.104, + "kernel_overhead": 1712983.429, + "profiling_overhead": 48052.906, + "profiling_runs": 1754036.497, "runtimes": [ - 5932.8 + 6112.544 ], - "search_algorithm": 25.304, - "validation": 27.046 + "search_algorithm": 13.239, + "validation": 22.74 }, - "timestamp": "2026-03-05 09:00:49 UTC" + "timestamp": "2026-03-13 09:42:10 UTC" } ], "schema_version": "1.0.0" diff --git a/Source/Output/JsonT4Converters.cpp b/Source/Output/JsonT4Converters.cpp index d98f5a5b..f77c6a94 100644 --- a/Source/Output/JsonT4Converters.cpp +++ b/Source/Output/JsonT4Converters.cpp @@ -11,30 +11,27 @@ void to_json(json& j, const as_T4& configuration) j = json::object(); const std::vector& pairs = configuration.v.GetPairs(); for (const auto& pair : pairs) { - std::string value; switch (pair.GetValueType()) { case ParameterValueType::Int: - value = std::to_string(std::get(pair.GetValue())); + j[pair.GetName()] = std::get(pair.GetValue()); break; case ParameterValueType::UnsignedInt: - value = std::to_string(pair.GetValueUint()); + j[pair.GetName()] = pair.GetValueUint(); break; case ParameterValueType::Double: - value = std::to_string(std::get(pair.GetValue())); + j[pair.GetName()] = std::get(pair.GetValue()); break; case ParameterValueType::Bool: - value = std::to_string(std::get(pair.GetValue())); + j[pair.GetName()] = std::get(pair.GetValue()); break; case ParameterValueType::String: - value = pair.GetValueString(); + j[pair.GetName()] = pair.GetValueString(); break; default: KttError("Unhandled parameter value type"); } - - j[pair.GetName()] = value; } } @@ -42,36 +39,34 @@ void from_json(const json& j, as_T4& configuration) { std::vector pairs; for (auto it = j.begin(); it != j.end(); ++it) { - ParameterPair pair; std::string name = it.key(); - std::string valueStr; - - try { - valueStr = it.value().get(); - if (valueStr == "true" || valueStr == "false") { - pair = ParameterPair(name, valueStr == "true"); - } - // detect floating-point numbers (presence of '.' or exponent) - else if (valueStr.find('.') != std::string::npos || - valueStr.find('e') != std::string::npos || - valueStr.find('E') != std::string::npos) { - pair = ParameterPair(name, std::stod(valueStr)); - } - // detect unsigned integers - else if (!valueStr.empty() && valueStr.find_first_not_of("0123456789") == std::string::npos) { - pair = ParameterPair(name, static_cast(std::stoull(valueStr))); - } - // fallback: signed integer - else { - pair = ParameterPair(name, static_cast(std::stoll(valueStr))); - } - } catch (const std::invalid_argument&) { - pair = ParameterPair(name, valueStr); - } catch (const std::out_of_range&) { - pair = ParameterPair(name, valueStr); + const auto &jsonValue = it.value(); + + ParameterPair pair; + + if (jsonValue.is_boolean()) + { + pair = ParameterPair(name, jsonValue.get()); + } + else if (jsonValue.is_number_float()) + { + pair = ParameterPair(name, jsonValue.get()); } - catch (const nlohmann::json::type_error& e) { - KttError("JSON type error while parsing"); + else if (jsonValue.is_number_unsigned()) + { + pair = ParameterPair(name, jsonValue.get()); + } + else if (jsonValue.is_number_integer()) + { + pair = ParameterPair(name, jsonValue.get()); + } + else if (jsonValue.is_string()) + { + pair = ParameterPair(name, jsonValue.get()); + } + else + { + KttError("Unsupported parameter value type in configuration"); } pairs.push_back(pair); } diff --git a/Tutorials/03KernelTuning/FullSearchSpace.t4.json b/Tutorials/03KernelTuning/FullSearchSpace.t4.json index 32d2c2b9..23c0d7b8 100644 --- a/Tutorials/03KernelTuning/FullSearchSpace.t4.json +++ b/Tutorials/03KernelTuning/FullSearchSpace.t4.json @@ -5,7 +5,7 @@ "compute_api": "CUDA", "device": "NVIDIA RTX 500 Ada Generation Laptop GPU", "platform": "NVIDIA CUDA", - "timestamp": "2026-03-05 09:04:43 UTC", + "timestamp": "2026-03-13 09:44:4 UTC", "timeunit": "microseconds" }, "results": [ @@ -28,7 +28,7 @@ "registers": 12 }, "configuration": { - "multiply_block_size": "32" + "multiply_block_size": 32 }, "correctness": 1, "invalidity": "correct", @@ -36,49 +36,49 @@ { "name": "time", "unit": "", - "value": 90.656 + "value": 89.44 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 26.710382938978828 + "value": 19.12216730954677 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 4828.0 + "value": 1696.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 50080.0 + "value": 49068.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 19.857135397000107 + "value": 20.021818505655002 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 263456.0 + "value": 263544.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131870.0 + "value": 132012.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.277755426557081 + "value": 6.427757366282913 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -90,7 +90,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 1.4745269705958586 + "value": 1.4854734765374251 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -120,13 +120,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 24.890646652745048 + "value": 25.2548705608268 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 40.744663569536044 + "value": 40.84136332809888 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -186,7 +186,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.142985215480907 + "value": 8.183513926905666 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -204,7 +204,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.237890027386714 + "value": 7.274357932686437 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -216,7 +216,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.632338798988025 + "value": 13.701274699806198 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -235,19 +235,19 @@ "time" ], "times": { - "compilation_time": 16215.021, - "data": 10669.496, - "framework": 48014.751000000004, - "kernel_overhead": 9367.319, - "profiling_overhead": 4977.19, - "profiling_runs": 23000.746, + "compilation_time": 15061.987, + "data": 6837.891, + "framework": 40386.628, + "kernel_overhead": 8584.186, + "profiling_overhead": 3889.532, + "profiling_runs": 21075.019, "runtimes": [ - 90.656 + 89.44 ], - "search_algorithm": 17.65, - "validation": 10899.337 + "search_algorithm": 12.432, + "validation": 9727.423 }, - "timestamp": "2026-03-05 09:04:43 UTC" + "timestamp": "2026-03-13 09:44:4 UTC" }, { "compilation_data": { @@ -268,7 +268,7 @@ "registers": 12 }, "configuration": { - "multiply_block_size": "64" + "multiply_block_size": 64 }, "correctness": 1, "invalidity": "correct", @@ -276,49 +276,49 @@ { "name": "time", "unit": "", - "value": 62.816 + "value": 54.336 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.741541353383454 + "value": 40.05379593810445 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 7328.0 + "value": 3576.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 43480.0 + "value": 49436.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 39.85658224775467 + "value": 40.105867823656546 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262729.0 + "value": 262809.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131205.0 + "value": 131947.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 6.859707591566043 + "value": 6.901737439945743 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -330,7 +330,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 2.969729925684249 + "value": 2.97997104422667 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -360,13 +360,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 29.580783726337412 + "value": 29.65983910408843 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 81.44435381547943 + "value": 81.35844773773927 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -426,7 +426,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 8.204882396732012 + "value": 8.242345387559466 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -444,7 +444,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 7.292660047159261 + "value": 7.325535644024753 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -456,7 +456,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.778449397267256 + "value": 13.839641011195766 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -475,19 +475,19 @@ "time" ], "times": { - "compilation_time": 14667.761, - "data": 6443.632, - "framework": 43979.096999999994, - "kernel_overhead": 9226.711, - "profiling_overhead": 5352.126, - "profiling_runs": 22956.628, + "compilation_time": 12324.712, + "data": 4618.838, + "framework": 38430.715, + "kernel_overhead": 8863.657, + "profiling_overhead": 3822.415, + "profiling_runs": 21125.805, "runtimes": [ - 62.816 + 54.336 ], - "search_algorithm": 22.325, - "validation": 12768.84 + "search_algorithm": 8.477, + "validation": 9358.694 }, - "timestamp": "2026-03-05 09:04:43 UTC" + "timestamp": "2026-03-13 09:44:4 UTC" }, { "compilation_data": { @@ -508,7 +508,7 @@ "registers": 12 }, "configuration": { - "multiply_block_size": "128" + "multiply_block_size": 128 }, "correctness": 1, "invalidity": "correct", @@ -516,49 +516,49 @@ { "name": "time", "unit": "", - "value": 64.127 + "value": 44.256 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 83.6156952247191 + "value": 71.57711330935251 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 25264.0 + "value": 9788.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 50940.0 + "value": 41152.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 44.82256259553097 + "value": 74.65849479972103 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262635.0 + "value": 262462.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131352.0 + "value": 131348.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 12.684698076468973 + "value": 13.352566766908714 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -570,7 +570,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 3.3374888981234774 + "value": 5.5507372072853425 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -600,13 +600,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 62.81015276503553 + "value": 62.16194625776604 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 49.804775029129694 + "value": 82.55620527970513 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -666,7 +666,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 15.080535694657113 + "value": 15.128991154364646 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -684,7 +684,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.402284805709714 + "value": 13.447171387006293 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -696,7 +696,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.374329295258352 + "value": 25.45905584999969 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -715,19 +715,19 @@ "time" ], "times": { - "compilation_time": 13879.778, - "data": 6938.962, - "framework": 44236.454, - "kernel_overhead": 8881.219, - "profiling_overhead": 5568.067, - "profiling_runs": 22848.206, + "compilation_time": 12721.594, + "data": 4697.985, + "framework": 38527.655, + "kernel_overhead": 8903.951, + "profiling_overhead": 3881.076, + "profiling_runs": 21044.643, "runtimes": [ - 64.127 + 44.256 ], - "search_algorithm": 21.857, - "validation": 12616.121 + "search_algorithm": 9.675, + "validation": 9290.56 }, - "timestamp": "2026-03-05 09:04:43 UTC" + "timestamp": "2026-03-13 09:44:4 UTC" }, { "compilation_data": { @@ -748,7 +748,7 @@ "registers": 12 }, "configuration": { - "multiply_block_size": "256" + "multiply_block_size": 256 }, "correctness": 1, "invalidity": "correct", @@ -756,49 +756,49 @@ { "name": "time", "unit": "", - "value": 69.728 + "value": 44.576 }, { "name": "dram__throughput.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 80.48551502145924 + "value": 72.94351374570446 }, { "name": "dram__sectors_read.sum", "type": "Double", "unit": "", - "value": 10328.0 + "value": 10632.0 }, { "name": "dram__sectors_write.sum", "type": "Double", "unit": "", - "value": 37680.0 + "value": 43708.0 }, { "name": "lts__t_sectors.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 68.16769541629762 + "value": 71.22926120761916 }, { "name": "lts__t_sectors_op_read.sum", "type": "Double", "unit": "", - "value": 262508.0 + "value": 262455.0 }, { "name": "lts__t_sectors_op_write.sum", "type": "Double", "unit": "", - "value": 131398.0 + "value": 131700.0 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.075452800600143 + "value": 13.16272035445509 }, { "name": "l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum", @@ -810,7 +810,7 @@ "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 5.073953868642073 + "value": 5.2921264115351825 }, { "name": "l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", @@ -840,13 +840,13 @@ "name": "sm__warps_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 63.04814826268487 + "value": 63.26284267038368 }, { "name": "smsp__cycles_active.avg.pct_of_peak_sustained_elapsed", "type": "Double", "unit": "", - "value": 76.6576908307113 + "value": 79.48432937543606 }, { "name": "smsp__sass_thread_inst_executed_op_fp16_pred_on.sum", @@ -906,7 +906,7 @@ "name": "smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 14.897442034979491 + "value": 14.978992866565715 }, { "name": "smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active", @@ -924,7 +924,7 @@ "name": "smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 13.237951244441348 + "value": 13.316150373587144 }, { "name": "smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active", @@ -936,7 +936,7 @@ "name": "smsp__issue_active.avg.pct_of_peak_sustained_active", "type": "Double", "unit": "", - "value": 25.077793365066764 + "value": 25.225932723328103 }, { "name": "smsp__thread_inst_executed_per_inst_executed.ratio", @@ -955,19 +955,19 @@ "time" ], "times": { - "compilation_time": 14439.507, - "data": 6548.418, - "framework": 43838.988, - "kernel_overhead": 9133.236, - "profiling_overhead": 5376.813, - "profiling_runs": 22780.521, + "compilation_time": 12673.4, + "data": 4649.726, + "framework": 37830.429, + "kernel_overhead": 8500.152, + "profiling_overhead": 3821.577, + "profiling_runs": 20858.974, "runtimes": [ - 69.728 + 44.576 ], - "search_algorithm": 6.933, - "validation": 14969.414 + "search_algorithm": 3.471, + "validation": 9406.798 }, - "timestamp": "2026-03-05 09:04:43 UTC" + "timestamp": "2026-03-13 09:44:4 UTC" } ], "schema_version": "1.0.0"