From 8acc35db984f534bfab893f75a3a7d7887c5a410 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Sun, 3 May 2026 18:02:23 -0400 Subject: [PATCH 01/18] Parser + Arrow Updates --- EVNT/BNL/CentOS7/centos_cron.sh | 2 +- EVNT/BNL/CentOS7/evnt_centos.sub | 3 +- EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh | 18 ++- EVNT/BNL/EL9/el_cron.sh | 2 +- EVNT/BNL/EL9/evnt_el.sub | 3 +- EVNT/BNL/EL9/run_evnt_el9_batch.sh | 19 +++- EVNT/BNL/Native/evnt_native.sub | 3 +- EVNT/BNL/Native/native_cron.sh | 2 +- EVNT/BNL/Native/run_evnt_native_batch.sh | 20 +++- EVNT/UC/CentOS7/run_evnt_centos7_batch.sh | 17 ++- EVNT/UC/EL9/run_evnt_el9_batch.sh | 16 ++- EVNT/UC/Native/run_evnt_native_batch.sh | 14 ++- NTuple_Hist/coffea/BNL/coffea_el9.sub | 3 +- NTuple_Hist/coffea/BNL/cron_example.sh | 2 +- NTuple_Hist/coffea/BNL/run_example.sh | 18 ++- NTuple_Hist/coffea/UC/run_example.sh | 13 ++- .../BNL/columnar/cron_eventloop_arrays.sh | 2 +- .../BNL/columnar/event_loop_arrays.py | 106 ------------------ .../BNL/columnar/eventloop_arrays.sub | 2 +- .../BNL/columnar/run_eventloop_arrays.sh | 28 ++++- .../BNL/standard/cron_eventloop_noarrays.sh | 2 +- .../event_loop/BNL/standard/event_loop.sub | 12 -- .../BNL/standard/event_loop_noarrays.py | 91 --------------- .../BNL/standard/eventloop_noarrays.sub | 2 +- .../BNL/standard/run_eventloop_noarrays.sh | 17 ++- .../UC/columnar/run_eventloop_arrays.sh | 12 +- .../UC/standard/run_eventloop_noarrays.sh | 14 ++- .../fastframes/BNL/crontab_fastframes.sh | 2 +- NTuple_Hist/fastframes/BNL/fastframes_el9.sub | 3 +- NTuple_Hist/fastframes/BNL/run_fastframes.sh | 17 ++- NTuple_Hist/fastframes/UC/run_fastframes.sh | 13 ++- Rucio/cron_rucio_bnl.sh | 2 +- Rucio/rucio_el.sub | 2 +- Rucio/rucio_script.sh | 15 +++ TRUTH3/BNL/CentOS7/cron_centos_batch.sh | 2 +- .../BNL/CentOS7/run_truth3_centos7_batch.sh | 18 ++- TRUTH3/BNL/CentOS7/truth3_centos.sub | 2 +- TRUTH3/BNL/EL9/cron_el_batch.sh | 2 +- TRUTH3/BNL/EL9/run_truth3_el9_batch.sh | 19 +++- TRUTH3/BNL/EL9/truth3_el.sub | 3 +- TRUTH3/BNL/Native/cron_native_batch.sh | 2 +- TRUTH3/BNL/Native/run_truth3_native_batch.sh | 19 +++- TRUTH3/BNL/Native/truth3_native.sub | 3 +- TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh | 13 ++- TRUTH3/UC/EL9/run_truth3_el9_batch.sh | 14 ++- TRUTH3/UC/Native/run_truth3_native_batch.sh | 12 +- parsing/handlers/base_parser.py | 98 +++++++--------- parsing/scripts/ci_parse.py | 27 +---- 48 files changed, 355 insertions(+), 376 deletions(-) delete mode 100644 NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py delete mode 100644 NTuple_Hist/event_loop/BNL/standard/event_loop.sub delete mode 100644 NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py diff --git a/EVNT/BNL/CentOS7/centos_cron.sh b/EVNT/BNL/CentOS7/centos_cron.sh index f22bbbc5..96f48726 100644 --- a/EVNT/BNL/CentOS7/centos_cron.sh +++ b/EVNT/BNL/CentOS7/centos_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/centos" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/CentOS7/evnt_centos.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/CentOS7/evnt_centos.sub b/EVNT/BNL/CentOS7/evnt_centos.sub index c55ef75f..c38e866c 100644 --- a/EVNT/BNL/CentOS7/evnt_centos.sub +++ b/EVNT/BNL/CentOS7/evnt_centos.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index 5a8418df..f3409969 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -1,8 +1,10 @@ #!/bin/bash -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +# Current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) # Sets up the container: ## -c : used to make a container followed by the OS we want to use @@ -12,17 +14,25 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c centos7 -m /atlasgpfs01 -r "asetup AthGeneration,23.6.31,here && export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ - Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/BNL/EL9/el_cron.sh b/EVNT/BNL/EL9/el_cron.sh index e372afb5..a8856a55 100644 --- a/EVNT/BNL/EL9/el_cron.sh +++ b/EVNT/BNL/EL9/el_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/el" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/EL9/evnt_el.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/EL9/evnt_el.sub b/EVNT/BNL/EL9/evnt_el.sub index af007825..5615da6d 100644 --- a/EVNT/BNL/EL9/evnt_el.sub +++ b/EVNT/BNL/EL9/evnt_el.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/EL9/run_evnt_el9_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/EL9/run_evnt_el9_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index 8f7c5ffd..55eebd73 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -1,7 +1,10 @@ #!/bin/bash -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) # The OS used in the container OScontainer="el9" @@ -14,17 +17,25 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c ${OScontainer} -m /atlasgpfs01 -r "asetup AthGeneration,23.6.34,here &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ - Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/BNL/Native/evnt_native.sub b/EVNT/BNL/Native/evnt_native.sub index 2a2a3251..10084a33 100644 --- a/EVNT/BNL/Native/evnt_native.sub +++ b/EVNT/BNL/Native/evnt_native.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/Native/run_evnt_native_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/Native/run_evnt_native_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/EVNT/BNL/Native/native_cron.sh b/EVNT/BNL/Native/native_cron.sh index 49931c4f..62116147 100644 --- a/EVNT/BNL/Native/native_cron.sh +++ b/EVNT/BNL/Native/native_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/native" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/Native/evnt_native.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index 02ed1d54..58349a1b 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -1,9 +1,14 @@ #!/bin/bash +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # The seed used in the job seed=1001 - # Sets up our working environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -15,22 +20,27 @@ asetup AthGeneration,23.6.34,here # Appends time before Gen_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.generate # Appends time after Gen_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh index 4b1b3138..3fa8a9f9 100755 --- a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh @@ -1,5 +1,7 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # shellcheck disable=SC2034 OS_container="centos7" @@ -7,6 +9,9 @@ OS_container="centos7" # shellcheck disable=SC2034 seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -22,7 +27,15 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 -source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here && export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log" +source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here &&\ + export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Appends time after Gen_tf.py to a log file { @@ -30,3 +43,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup hostname du EVNT.root } >> split.log + +append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/EVNT/UC/EL9/run_evnt_el9_batch.sh b/EVNT/UC/EL9/run_evnt_el9_batch.sh index b45efe93..8a758342 100755 --- a/EVNT/UC/EL9/run_evnt_el9_batch.sh +++ b/EVNT/UC/EL9/run_evnt_el9_batch.sh @@ -1,11 +1,16 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # Defining the OS wanted in the container OS_container="el9" # The seed used in the job seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -21,11 +26,18 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c "${OS_container}" -r "asetup AthGeneration,23.6.34,here && \ -Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log" +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log && \ +cat pipe_file.log >> log.generate" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Appends time after Gen_tf.py to a log file { date -u "+%Y-%m-%dT%H:%M:%SZ" hostname du EVNT.root -} >> split.logi +} >> split.log + +append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/EVNT/UC/Native/run_evnt_native_batch.sh b/EVNT/UC/Native/run_evnt_native_batch.sh index 1637a223..23cc2275 100755 --- a/EVNT/UC/Native/run_evnt_native_batch.sh +++ b/EVNT/UC/Native/run_evnt_native_batch.sh @@ -1,8 +1,13 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # The seed used in the job seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -22,9 +27,14 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log asetup AthGeneration,23.6.34,here echo "::group::EVNT Generation" -Gen_tf.py --ecmEnergy=13000.0 --jobConfig="${config_dir}" --outputEVNTFile=EVNT.root --maxEvents="${max_events}" --randomSeed="${seed}" 2>&1 | tee pipe_file.log +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig="${config_dir}" --outputEVNTFile=EVNT.root --maxEvents="${max_events}" --randomSeed="${seed}" 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.generate echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Appends time after Gen_tf.py to a log file echo "::group::Collect Metrics" { @@ -33,3 +43,5 @@ echo "::group::Collect Metrics" du EVNT.root } >> split.log echo "::endgroup::" + +append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/coffea/BNL/coffea_el9.sub b/NTuple_Hist/coffea/BNL/coffea_el9.sub index 70b584da..7ee91ec5 100755 --- a/NTuple_Hist/coffea/BNL/coffea_el9.sub +++ b/NTuple_Hist/coffea/BNL/coffea_el9.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/coffea/BNL/run_example.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/coffea/BNL/run_example.sh request_memory = 16GB request_cpus = 4 diff --git a/NTuple_Hist/coffea/BNL/cron_example.sh b/NTuple_Hist/coffea/BNL/cron_example.sh index 8670c581..2807237d 100644 --- a/NTuple_Hist/coffea/BNL/cron_example.sh +++ b/NTuple_Hist/coffea/BNL/cron_example.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist" readonly log_output="coffea_hist.log" readonly job_dir="/usatlas/u/qlei/test/coffea" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/coffea/BNL/coffea_el9.sub" readonly pixi_job="coffea" diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index fd8fd369..d98d9e85 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -1,7 +1,11 @@ #!/bin/bash -# Gets the current time -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) working_dir="/atlasgpfs01/usatlas/data/qlei/ntuple/coffea" @@ -22,7 +26,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ - ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" + /usr/bin/time -v ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -30,9 +38,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 du coffea.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${start_time}" mkdir -p "${output_dir}" +append_benchmark coffea_hist.log "${start_time}" "${wall_time}" "${end_time}" "time_v" + mv coffea_hist.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/coffea/UC/run_example.sh b/NTuple_Hist/coffea/UC/run_example.sh index 91640ed3..c339c008 100755 --- a/NTuple_Hist/coffea/UC/run_example.sh +++ b/NTuple_Hist/coffea/UC/run_example.sh @@ -1,7 +1,12 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + #cp ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py . # Setting up environment and container @@ -12,7 +17,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /data -r "ls python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ - ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" + /usr/bin/time -v ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) echo "::group::Collect Metrics" { @@ -20,3 +29,5 @@ echo "::group::Collect Metrics" hostname } >> split.log echo "::endgroup::" + +append_benchmark "coffea_hist.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh index d5e29cd0..9e5fa2a3 100644 --- a/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays" readonly log_output="eventloop_arrays.log" readonly job_dir="/usatlas/u/qlei/test/eventloop_arrays" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub" readonly pixi_job="eventloop-columnar" diff --git a/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py b/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py deleted file mode 100644 index 43975d04..00000000 --- a/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py +++ /dev/null @@ -1,106 +0,0 @@ -import ROOT as r -from pathlib import Path -from array import array - - -ph_pt_array = r.std.vector("float")() -ph_select_tightID_array = r.std.vector("char")() -weight_mc_array = array("f", [0.0]) -weight_pileup_array = array("f", [0.0]) - - -def photon_eventloop(f_name, h_baseline_pt, metadata): - """Process one ROOT file and fill the given histogram.""" - fp = r.TFile.Open(f_name, "READ") - if not fp or fp.IsZombie(): - print(f"Could not open file: {f_name}") - return - - tree = fp.Get("analysis") - if not tree: - print(f"No tree named 'analysis' in {f_name}") - fp.Close() - return - - tree.SetBranchAddress("ph_pt_NOSYS", ph_pt_array) - tree.SetBranchAddress("ph_select_tightID_NOSYS", ph_select_tightID_array) - tree.SetBranchAddress("weight_mc_NOSYS", weight_mc_array) - tree.SetBranchAddress("weight_pileup_NOSYS", weight_pileup_array) - - totalevents = tree.GetEntriesFast() - print(f" {f_name} has {totalevents} events") - - # Normalization factors - xs = metadata["xs"] - lum = metadata["luminosity"] - genFiltEff = metadata["genFiltEff"] - kfactor = metadata.get("kfactor", 1.0) - sumOfWeights = metadata["sum_of_weights"] - weight_norm = xs * genFiltEff * kfactor * lum / sumOfWeights - - # name of branches in tree - # print(tree.GetListOfBranches()) - - # Event loop - numevents = tree.GetEntriesFast() - for i in range(numevents): - if (i + 1) % 50000 == 0: - print(f" Processed {i + 1:6d}/{totalevents}") - - tree.GetEntry(i) - - weight = weight_norm * weight_mc_array[0] * weight_pileup_array[0] - - # print("--------------------------------------------") - for index in range(len(ph_pt_array)): - if not (ord(ph_select_tightID_array[index]) > 0): - continue - - h_baseline_pt.Fill(ph_pt_array[index] / 1000.0, weight) # Fill GeV - break # only fill with first passing photon - - fp.Close() - - -def main(): - # Define samples (all will contribute to one histogram) - samples = [ - { - "name": "Wmunugamma", - "path": Path( - "/data/maclwong/Ben_Bkg_Samples/v2/user.bhodkins.700402.Wmunugamma.mc20e.v2.0_ANALYSIS.root/" - ), - "metadata": { - "genFiltEff": 1.0, - "luminosity": 58.7916, - "xs": 364840.0, - "sum_of_weights": 1816229744476160.0, - "kfactor": 1.0, - }, - } - ] - - # Create a single histogram for all samples combined - h_baseline_pt = r.TH1D( - "baseline_pt_total", - "Photon baseline pT (all samples); pT [GeV]; Events", - 100, - 0, - 1000, - ) - - # Process every file in every sample, filling the same histogram - for s in samples: - print(f"\nProcessing sample: {s['name']}") - for f in sorted(s["path"].glob("*.root")): - photon_eventloop(str(f), h_baseline_pt, s["metadata"]) - - # Save combined histogram - output_file = r.TFile("event_loop_arrays_output_hist.root", "RECREATE") - h_baseline_pt.Write() - output_file.Close() - print("\nCombined histogram written to event_loop_output_hist.root") - - -if __name__ == "__main__": - main() diff --git a/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub b/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub index 868050d8..8c85da41 100644 --- a/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub +++ b/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$( Error = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index b2bc6527..16bc34b1 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -1,7 +1,10 @@ #!/bin/bash -# Time that will be used to store the log file -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -9,7 +12,7 @@ export ALRB_localConfigDir="$HOME"/localConfig source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.py 2>&1 | tee eventloop_arrays.log +/usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.py 2>&1 | tee eventloop_arrays.log { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -17,9 +20,26 @@ python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.p du event_loop_output_hist.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays/${curr_time}" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays/${start_time}" mkdir -p "${output_dir}" +echo "Start Time: ${start_time}" +echo "End Time: ${end_time}" +echo "Wall Time: ${wall_time}" + +# Verify the log exists before appending +if [ -f eventloop_arrays.log ]; then + append_benchmark eventloop_arrays.log "${start_time}" "${wall_time}" "${end_time}" "time_v" +else + echo "ERROR: eventloop_arrays.log not found in $(pwd)" +fi + +# append_benchmark eventloop_arrays.log "${start_time}" "${wall_time}" "${end_time}" + mv eventloop_arrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh index 47caccc2..ef47065c 100644 --- a/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays" readonly log_output="eventloop_noarrays.log" readonly job_dir="/usatlas/u/qlei/test/eventloop_noarrays" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub" readonly pixi_job="eventloop-standard" diff --git a/NTuple_Hist/event_loop/BNL/standard/event_loop.sub b/NTuple_Hist/event_loop/BNL/standard/event_loop.sub deleted file mode 100644 index 5753218c..00000000 --- a/NTuple_Hist/event_loop/BNL/standard/event_loop.sub +++ /dev/null @@ -1,12 +0,0 @@ -Universe = vanilla - -Output = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).out -Error = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).err -Log = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).log - -Executable = /home/selbor/AF-Benchmarking/NTuple_Hist/event_loop/UC/run_photon_eventloop.sh - -request_memory = 3GB -request_cpus = 1 - -Queue 1 diff --git a/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py b/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py deleted file mode 100644 index bff6e3b4..00000000 --- a/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py +++ /dev/null @@ -1,91 +0,0 @@ -import ROOT as r -from pathlib import Path - - -def photon_eventloop(f_name, h_baseline_pt, metadata): - """Process one ROOT file and fill the given histogram.""" - fp = r.TFile.Open(f_name, "READ") - if not fp or fp.IsZombie(): - print(f"Could not open file: {f_name}") - return - - tree = fp.Get("analysis") - if not tree: - print(f"No tree named 'analysis' in {f_name}") - fp.Close() - return - - totalevents = tree.GetEntriesFast() - print(f" {f_name} has {totalevents} events") - - # Normalization factors - xs = metadata["xs"] - lum = metadata["luminosity"] - genFiltEff = metadata["genFiltEff"] - kfactor = metadata.get("kfactor", 1.0) - sumOfWeights = metadata["sum_of_weights"] - weight_norm = xs * genFiltEff * kfactor * lum / sumOfWeights - - # name of branches in tree - # print(tree.GetListOfBranches()) - - # Event loop - for i, event in enumerate(tree): - if (i + 1) % 50000 == 0: - print(f" Processed {i + 1:6d}/{totalevents}") - - weight = weight_norm * event.weight_mc_NOSYS * event.weight_pileup_NOSYS - - for index, ph_pt in enumerate(event.ph_pt_NOSYS): - ph_tight = ord(event.ph_select_tightID_NOSYS[index]) > 0 - if not ph_tight: - continue - - h_baseline_pt.Fill(ph_pt / 1000.0, weight) # Fill GeV - break # only fill with first passing photon - - fp.Close() - - -def main(): - # Define samples (all will contribute to one histogram) - samples = [ - { - "name": "Wmunugamma", - "path": Path( - "/data/maclwong/Ben_Bkg_Samples/v2/user.bhodkins.700402.Wmunugamma.mc20e.v2.0_ANALYSIS.root/" - ), - "metadata": { - "genFiltEff": 1.0, - "luminosity": 58.7916, - "xs": 364840.0, - "sum_of_weights": 1816229744476160.0, - "kfactor": 1.0, - }, - } - ] - - # Create a single histogram for all samples combined - h_baseline_pt = r.TH1D( - "baseline_pt_total", - "Photon baseline pT (all samples); pT [GeV]; Events", - 100, - 0, - 1000, - ) - - # Process every file in every sample, filling the same histogram - for s in samples: - print(f"\nProcessing sample: {s['name']}") - for f in sorted(s["path"].glob("*.root")): - photon_eventloop(str(f), h_baseline_pt, s["metadata"]) - - # Save combined histogram - output_file = r.TFile("event_loop_noarrays_output_hist.root", "RECREATE") - h_baseline_pt.Write() - output_file.Close() - print("\nCombined histogram written to event_loop_output_hist.root") - - -if __name__ == "__main__": - main() diff --git a/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub b/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub index 83cf600f..4554be82 100644 --- a/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub +++ b/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process Error = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 7173ab92..80694587 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -1,7 +1,10 @@ #!/bin/bash -# Time that will be used to store the log file -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -10,7 +13,7 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ -python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.py 2>&1 | tee eventloop_noarrays.log +/usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.py 2>&1 | tee eventloop_noarrays.log { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -18,9 +21,15 @@ python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays du event_loop_noarrays_output_hist.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${curr_time}" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${start_time}" mkdir -p "${output_dir}" +append_benchmark eventloop_noarrays.log "${start_time}" "${wall_time}" "${end_time}" "time_v" + mv eventloop_noarrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh index 5d3dba57..9753a251 100755 --- a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh @@ -1,5 +1,9 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -13,11 +17,17 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Running the script echo "::group::EventLoop Execution" -python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/columnar/event_loop_arrays.py 2>&1 | tee eventloop_arrays.log +/usr/bin/time -v python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/columnar/event_loop_arrays.py 2>&1 | tee eventloop_arrays.log echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Collect metrics echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" + +append_benchmark "eventloop_arrays.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh index b1de4803..38fb062d 100755 --- a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh @@ -1,5 +1,10 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -7,17 +12,22 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh echo "::endgroup::" lsetup "views LCG_107a_ATLAS_2 x86_64-el9-gcc13-opt" - # Getting start date date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Running the script echo "::group::EventLoop Execution" -python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/standard/event_loop_noarrays.py 2>&1 | tee eventloop_noarrays.log +/usr/bin/time -v python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/standard/event_loop_noarrays.py 2>&1 | tee eventloop_noarrays.log echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Collect metrics echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" + +append_benchmark "eventloop_noarrays.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh b/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh index a8ca022f..b6599a7b 100644 --- a/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple" readonly log_output="fastframes.log" readonly job_dir="/usatlas/u/qlei/test/fastframes" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/fastframes/BNL/fastframes_el9.sub" readonly pixi_job="fastframes" diff --git a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub index 78b03263..1545050f 100644 --- a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub +++ b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub @@ -1,12 +1,11 @@ # shellcheck shell=bash disable=all Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/fastframes/BNL/run_fastframes.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/fastframes/BNL/run_fastframes.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 55d1965e..6fd0ab47 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -1,6 +1,11 @@ #!/bin/bash -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) cd /atlasgpfs01/usatlas/data/qlei/ || exit @@ -16,7 +21,7 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -m /atlasgpfs01/usatlas/data/ -c el9 -r "asetup StatAnalysis,0.6.2 &&\ source /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/TutorialClass/build/setup.sh &&\ - python3 /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/FastFrames/python/FastFrames.py -c /atlasgpfs01/usatlas/data/qlei/input/mc20e_example_config.yml 2>&1 | tee fastframes.log" + /usr/bin/time -v python3 /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/FastFrames/python/FastFrames.py -c /atlasgpfs01/usatlas/data/qlei/input/mc20e_example_config.yml 2>&1 | tee fastframes.log" # Getting the date and time after running script echo "::group::Collect Metrics" @@ -26,11 +31,17 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${start_time}" # Creates output dir mkdir -p "${output_dir}" +append_benchmark fastframes.log "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves log to outputdir mv fastframes.log "${output_dir}" diff --git a/NTuple_Hist/fastframes/UC/run_fastframes.sh b/NTuple_Hist/fastframes/UC/run_fastframes.sh index bdf96592..304415ca 100755 --- a/NTuple_Hist/fastframes/UC/run_fastframes.sh +++ b/NTuple_Hist/fastframes/UC/run_fastframes.sh @@ -1,7 +1,12 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + yml_dir="${GITHUB_WORKSPACE}/NTuple_Hist/fastframes/UC/" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Sets up our working environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -19,10 +24,14 @@ source /data/selbor/FastFramesTutorial/TutorialClass/build/setup.sh date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log echo "::group::FastFrames" -python3 /data/selbor/FastFramesTutorial/FastFrames/python/FastFrames.py -c "${yml_dir}"mc20e_example_config.yml 2>&1 | tee fastframes.log +/usr/bin/time -v python3 /data/selbor/FastFramesTutorial/FastFrames/python/FastFrames.py -c "${yml_dir}"mc20e_example_config.yml 2>&1 | tee fastframes.log printf "\n" echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Getting the date and time after running script echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -37,3 +46,5 @@ cleanup_dir="/home/selbor/ntuple/fastframes" if [[ -d "${cleanup_dir}" && "${cleanup_dir}" == "/home/selbor/ntuple/fastframes" ]]; then rm -rf "${cleanup_dir:?}/"* fi + +append_benchmark "fastframes.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/Rucio/cron_rucio_bnl.sh b/Rucio/cron_rucio_bnl.sh index 53140a6d..447b67a0 100644 --- a/Rucio/cron_rucio_bnl.sh +++ b/Rucio/cron_rucio_bnl.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/Rucio" readonly log_output="rucio.log" readonly job_dir="/usatlas/u/qlei/test/Rucio" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/Rucio/rucio_el.sub" readonly pixi_job="rucio" diff --git a/Rucio/rucio_el.sub b/Rucio/rucio_el.sub index ef1a98c6..7fa6ad79 100644 --- a/Rucio/rucio_el.sub +++ b/Rucio/rucio_el.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).ou Error = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/Rucio/rucio_script.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/Rucio/rucio_script.sh Arguments = bnl request_memory = 16GB diff --git a/Rucio/rucio_script.sh b/Rucio/rucio_script.sh index 630b45ce..9ce16b1b 100755 --- a/Rucio/rucio_script.sh +++ b/Rucio/rucio_script.sh @@ -11,6 +11,8 @@ container_el9 (){ # - dir_mount (2) # - output_dir (3) # - download_ID (4) + start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + start_epoch=$(date -u +%s) cd "${1}" || exit export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -25,6 +27,10 @@ container_el9 (){ hostname >> rucio.log &&\ du \"${4#*:}\"/ >> rucio.log &&\ mv rucio.log \"${3}\"" + end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + end_epoch=$(date -u +%s) + wall_time=$((end_epoch - start_epoch)) + append_benchmark "${3}/rucio.log" "${start_time}" "${wall_time}" "${end_time}" "rucio" } native_el9 () { @@ -32,6 +38,8 @@ native_el9 () { # - output_dir # - job_dir # - download_ID + start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + start_epoch=$(date -u +%s) echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -48,10 +56,14 @@ native_el9 () { echo "::group::Rucio Download" rucio download --rses AGLT2_LOCALGROUPDISK "${3}" 2>&1 | tee rucio.log echo "::endgroup::" + end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + end_epoch=$(date -u +%s) + wall_time=$((end_epoch - start_epoch)) echo "::group::Collect Metrics" hostname >> rucio.log du "${3#*:}" >> rucio.log echo "::endgroup::" + append_benchmark "rucio.log" "${start_time}" "${wall_time}" "${end_time}" "rucio" mv rucio.log "${1}" } @@ -82,6 +94,8 @@ case "$site" in job_dir="/usatlas/u/qlei/test/Rucio/" dir_mount="/atlasgpfs01/usatlas/data/" output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Rucio/${curr_time}/" + AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" + source ${AF_BENCH_DIR}/parsing/utils/benchmark_utils.sh container_el9 "$job_dir" "$dir_mount" "$output_dir" "$download_ID" ;; slac) @@ -92,6 +106,7 @@ case "$site" in ;; uchicago) output_dir="${PWD}" + source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" native_el9 "${PWD}" "${PWD}" "$download_ID" ;; nersc) diff --git a/TRUTH3/BNL/CentOS7/cron_centos_batch.sh b/TRUTH3/BNL/CentOS7/cron_centos_batch.sh index ff3d4377..f0bc2281 100644 --- a/TRUTH3/BNL/CentOS7/cron_centos_batch.sh +++ b/TRUTH3/BNL/CentOS7/cron_centos_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch" readonly log_output="log.EVNTtoDAOD" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/centos" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/CentOS7/truth3_centos.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index 4588f010..df2ffcc1 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -1,7 +1,11 @@ #!/bin/bash + +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + # current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -16,15 +20,23 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ - Reco_tf.py --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Reco_tf.py --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.EVNTtoDAOD &&\ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${curr_time}" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${start_time}" mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log + +append_benchmark log.EVNTtoDAOD "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves the log file to the output directory mv log.EVNTtoDAOD "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/CentOS7/truth3_centos.sub b/TRUTH3/BNL/CentOS7/truth3_centos.sub index ef8a4632..622c065e 100644 --- a/TRUTH3/BNL/CentOS7/truth3_centos.sub +++ b/TRUTH3/BNL/CentOS7/truth3_centos.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Pro Error = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh request_memory = 3G diff --git a/TRUTH3/BNL/EL9/cron_el_batch.sh b/TRUTH3/BNL/EL9/cron_el_batch.sh index 071b8ea4..489379ab 100644 --- a/TRUTH3/BNL/EL9/cron_el_batch.sh +++ b/TRUTH3/BNL/EL9/cron_el_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch" readonly log_output="log.Derivation" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/el" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/EL9/truth3_el.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index 1d1c1fd5..0d5c5e99 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -1,7 +1,10 @@ #!/bin/bash -# Current time used for file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# Current time used for file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -16,12 +19,16 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here &&\ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ - Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.Derivation &&\ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) + # Defines the output directory -# output_dir="/atlasgpfs01/usatlas/data/jroblesgo/benchmarks/${curr_time}/TRUTH3_el9_batch" -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" @@ -30,6 +37,8 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log +append_benchmark log.Derivation "${start_time}" "${wall_time}" "${end_time}" "time_v" + # Moves the log file to the output directory mv log.Derivation "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/EL9/truth3_el.sub b/TRUTH3/BNL/EL9/truth3_el.sub index 8f6c313c..5add79ef 100644 --- a/TRUTH3/BNL/EL9/truth3_el.sub +++ b/TRUTH3/BNL/EL9/truth3_el.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/TRUTH3/BNL/Native/cron_native_batch.sh b/TRUTH3/BNL/Native/cron_native_batch.sh index b6476f8a..55183ba5 100644 --- a/TRUTH3/BNL/Native/cron_native_batch.sh +++ b/TRUTH3/BNL/Native/cron_native_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch" readonly log_output="log.Derivation" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/native" -readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/Native/truth3_native.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index d10c617e..dcecc1b5 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -1,6 +1,10 @@ #!/bin/bash -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -16,18 +20,19 @@ asetup Athena,24.0.53,here # Appends time before Derivation_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +/usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log -Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.Derivation # Appends time after Derivation_tf.py to a log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -# current time used for log file storage - +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Defines the output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch/${curr_time}" - +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" @@ -36,6 +41,8 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log +append_benchmark log.Derivation "${start_time}" "${wall_time}" "${end_time}" "truth_v" + # Moves the log file to the output directory mv log.Derivation "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/Native/truth3_native.sub b/TRUTH3/BNL/Native/truth3_native.sub index 710ad981..beef6c2a 100644 --- a/TRUTH3/BNL/Native/truth3_native.sub +++ b/TRUTH3/BNL/Native/truth3_native.sub @@ -1,11 +1,10 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/Native/run_truth3_native_batch.sh +Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/Native/run_truth3_native_batch.sh request_memory = 3G diff --git a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh index 1e2d4573..437c9cb7 100755 --- a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh @@ -1,10 +1,14 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Defines the directory where the input files are stored config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Sets up the ATLAS Environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -17,7 +21,12 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ - Reco_tf.py --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log" + /usr/bin/time -v Reco_tf.py --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log && \ + cat pipe_file.log >> log.EVNTtoDAOD" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file { @@ -26,3 +35,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup hostname du DAOD_TRUTH3.TRUTH3.root } >> split.log + +append_benchmark "log.EVNTtoDAOD" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh index 9392c304..fc972fce 100755 --- a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh @@ -1,24 +1,32 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Defines the directory where the input files are stored config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Sets up the environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Derivation_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log - # Sets up the container: ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here && \ - Derivation_tf.py --CA True --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log" + /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log && \ + cat pipe_file.log >> log.Derivation" + +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file { @@ -27,3 +35,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athe hostname du DAOD_TRUTH3.TRUTH3.root } >> split.log + +append_benchmark "log.Derivation" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/TRUTH3/UC/Native/run_truth3_native_batch.sh b/TRUTH3/UC/Native/run_truth3_native_batch.sh index 4af2e42a..4298872a 100755 --- a/TRUTH3/UC/Native/run_truth3_native_batch.sh +++ b/TRUTH3/UC/Native/run_truth3_native_batch.sh @@ -1,10 +1,14 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Input files are stored here config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_epoch=$(date -u +%s) + # Sets up our environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -19,12 +23,16 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log asetup Athena,24.0.53,here echo "::group::TRUTH3 Derivation" -Derivation_tf.py --CA True --inputEVNTFile "${config_dir}" --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +/usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile "${config_dir}" --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.Derivation echo "::endgroup::" # Appends time after Derivation_tf.py to a log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_epoch=$(date -u +%s) +wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file echo "::group::Collect Metrics" @@ -35,3 +43,5 @@ echo "::group::Collect Metrics" du DAOD_TRUTH3.TRUTH3.root } >> split.log echo "::endgroup::" + +append_benchmark "log.Derivation" "${start_time}" "${wall_time}" "${end_time}" "time_v" diff --git a/parsing/handlers/base_parser.py b/parsing/handlers/base_parser.py index bdec6f85..99242a13 100644 --- a/parsing/handlers/base_parser.py +++ b/parsing/handlers/base_parser.py @@ -3,26 +3,37 @@ This module provides common parsing logic shared across different benchmark types (TRUTH3, EVNT, etc.). """ +import arrow -import datetime as dt +def parse_benchmark_block(file_lines): + """Extract key=value pairs from the LAST === BENCHMARK === block in a log. + Args: + file_lines: List of lines from the log file -# Month abbreviation to number mapping -MONTH_DICT = { - "Jan": "01", - "Feb": "02", - "Mar": "03", - "Apr": "04", - "May": "05", - "Jun": "06", - "Jul": "07", - "Aug": "08", - "Sep": "09", - "Oct": "10", - "Nov": "11", - "Dec": "12", -} - + Returns: + dict: All key=value pairs from the last benchmark block, + or empty dict if no block found + """ + last_block = {} + current_block = {} + in_block = False + + for line in file_lines: + line = line.strip() + if line == "=== BENCHMARK ===": + in_block = True + current_block = {} + continue + if line == "=================" and in_block: + in_block = False + last_block = current_block # keep overwriting — last one wins + continue + if in_block and "=" in line: + key, _, value = line.partition("=") + current_block[key.strip()] = value.strip() + + return last_block def parse_atlas_log(path, log_name="ATLAS"): """Parse ATLAS benchmark log file for timing information. @@ -37,56 +48,25 @@ def parse_atlas_log(path, log_name="ATLAS"): - queueTime: Queue time in seconds - runTime: Execution time in seconds - status: Exit status (0 = success) + - benchmark: Full dict of all fields from the benchmark block """ print(f"[{log_name}] Parsing {path.name}") - # Read log file with open(path) as f: file_lines = f.readlines() - N = len(file_lines) - - # Parse start datetime from first line - start_datetime_list = file_lines[0].split(" ") - end_time_list = file_lines[N - 1].split(" ") - - start_time = start_datetime_list[0] - month = int(MONTH_DICT[start_datetime_list[2]]) - year = int(start_datetime_list[-1]) - - # Handle different date formats (with/without day of week) - if len(start_datetime_list) == 8: - day = int(start_datetime_list[4]) - submit_time = dt.datetime.strptime(start_datetime_list[5], "%H:%M:%S").time() - else: - day = int(start_datetime_list[3]) - submit_time = dt.datetime.strptime(start_datetime_list[4], "%H:%M:%S").time() - - # Build datetime objects (explicitly UTC to avoid local timezone interpretation) - start_date_object = dt.date(year, month, day) - start_time = dt.datetime.strptime(start_datetime_list[0], "%H:%M:%S").time() - start_datetime_object = dt.datetime.combine( - start_date_object, start_time, tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_datetime_object.timestamp()) * 1000 - # Calculate queue time - submit_datetime_object = dt.datetime.combine( - start_date_object, submit_time, tzinfo=dt.timezone.utc - ) - queue_time = int((start_datetime_object - submit_datetime_object).total_seconds()) + benchmark = parse_benchmark_block(file_lines) - # Calculate run time - end_time = dt.datetime.strptime(end_time_list[0], "%H:%M:%S").time() - end_datetime_object = dt.datetime.combine( - start_date_object, end_time, tzinfo=dt.timezone.utc - ) - run_time = int((end_datetime_object - start_datetime_object).total_seconds()) + if not benchmark: + raise ValueError(f"[{log_name}] No BENCHMARK block found in {path.name}") - status = 0 + start_dt = arrow.get(benchmark["start_time_utc"]) + wall_time = float(benchmark["wall_time_sec"]) return { - "submitTime": utc_timestamp, - "queueTime": queue_time, - "runTime": run_time, - "status": status, + "submitTime": start_dt.int_timestamp * 1000, # milliseconds + "queueTime": 0, + "runTime": wall_time, + "status": int(benchmark.get("exit_status", 0)), + # "benchmark": benchmark, # full block — subparsers can pull extra fields from here } diff --git a/parsing/scripts/ci_parse.py b/parsing/scripts/ci_parse.py index 90f70a65..1b60d526 100755 --- a/parsing/scripts/ci_parse.py +++ b/parsing/scripts/ci_parse.py @@ -13,14 +13,7 @@ from rich.panel import Panel from rich.syntax import Syntax -from parsing.handlers import ( - rucio_parser, - coffea_parser, - fastframes_parser, - truth3_parser, - evnt_parser, - eventloop_parser, -) +from parsing.handlers import base_parser # Initialize rich console console = Console() @@ -92,22 +85,8 @@ def parse_log( f"[bold cyan]Payload file:[/bold cyan] {payload_file} ({payload_size} bytes)" ) - # Parse based on log type - if log_type == "rucio": - data = rucio_parser.parse_rucio_log(log_path) - elif log_type == "evnt": - data = evnt_parser.parse_evnt_log(log_path) - elif log_type == "truth3": - data = truth3_parser.parse_truth3_log(log_path) - elif log_type == "coffea": - data = coffea_parser.parse_coffea_log(log_path) - elif log_type == "eventloop": - data = eventloop_parser.parse_eventloop_log(log_path) - elif log_type == "fastframes": - data = fastframes_parser.parse_fastframes_log(log_path) - else: - raise ValueError(f"Unknown log type: {log_type}") - + data = base_parser.parse_atlas_log(log_path) + # Add common fields to all parsed data data["job"] = job data["cluster"] = cluster From 35b246d43c95bfe303b98644b47d19dac038178c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 3 May 2026 22:08:03 +0000 Subject: [PATCH 02/18] style: pre-commit fixes --- EVNT/UC/CentOS7/run_evnt_centos7_batch.sh | 6 +++--- parsing/handlers/base_parser.py | 9 ++++++--- parsing/scripts/ci_parse.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh index 3fa8a9f9..cbfdc8fa 100755 --- a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh @@ -27,9 +27,9 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 -source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here &&\ - export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ - export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ +source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here &&\ + export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.generate" diff --git a/parsing/handlers/base_parser.py b/parsing/handlers/base_parser.py index 99242a13..c9dd2136 100644 --- a/parsing/handlers/base_parser.py +++ b/parsing/handlers/base_parser.py @@ -3,8 +3,10 @@ This module provides common parsing logic shared across different benchmark types (TRUTH3, EVNT, etc.). """ + import arrow + def parse_benchmark_block(file_lines): """Extract key=value pairs from the LAST === BENCHMARK === block in a log. @@ -35,6 +37,7 @@ def parse_benchmark_block(file_lines): return last_block + def parse_atlas_log(path, log_name="ATLAS"): """Parse ATLAS benchmark log file for timing information. @@ -65,8 +68,8 @@ def parse_atlas_log(path, log_name="ATLAS"): return { "submitTime": start_dt.int_timestamp * 1000, # milliseconds - "queueTime": 0, - "runTime": wall_time, - "status": int(benchmark.get("exit_status", 0)), + "queueTime": 0, + "runTime": wall_time, + "status": int(benchmark.get("exit_status", 0)), # "benchmark": benchmark, # full block — subparsers can pull extra fields from here } diff --git a/parsing/scripts/ci_parse.py b/parsing/scripts/ci_parse.py index 1b60d526..29422a5b 100755 --- a/parsing/scripts/ci_parse.py +++ b/parsing/scripts/ci_parse.py @@ -86,7 +86,7 @@ def parse_log( ) data = base_parser.parse_atlas_log(log_path) - + # Add common fields to all parsed data data["job"] = job data["cluster"] = cluster From be4554dc75a667d430bc315faea66f5d0c9fc982 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Mon, 4 May 2026 16:04:01 -0700 Subject: [PATCH 03/18] bump From 6ab90f047323f0f78633a43f3ab547f320d7094f Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Fri, 8 May 2026 10:22:16 -0700 Subject: [PATCH 04/18] fix: compute runTime from timestamps and add benchmark_utils.sh - Replace wall_time_sec with end_time_utc - start_time_utc for runTime - Use queue_time variable instead of hardcoded 0 for queueTime - Add benchmark_utils.sh shared shell utilities for benchmark scripts Co-Authored-By: Claude Sonnet 4.6 --- parsing/handlers/base_parser.py | 9 ++-- parsing/utils/benchmark_utils.sh | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 parsing/utils/benchmark_utils.sh diff --git a/parsing/handlers/base_parser.py b/parsing/handlers/base_parser.py index c9dd2136..adc15a12 100644 --- a/parsing/handlers/base_parser.py +++ b/parsing/handlers/base_parser.py @@ -51,7 +51,6 @@ def parse_atlas_log(path, log_name="ATLAS"): - queueTime: Queue time in seconds - runTime: Execution time in seconds - status: Exit status (0 = success) - - benchmark: Full dict of all fields from the benchmark block """ print(f"[{log_name}] Parsing {path.name}") @@ -64,12 +63,14 @@ def parse_atlas_log(path, log_name="ATLAS"): raise ValueError(f"[{log_name}] No BENCHMARK block found in {path.name}") start_dt = arrow.get(benchmark["start_time_utc"]) - wall_time = float(benchmark["wall_time_sec"]) + end_dt = arrow.get(benchmark["end_time_utc"]) + queue_time = 0 + run_time = int((end_dt - start_dt).total_seconds()) return { "submitTime": start_dt.int_timestamp * 1000, # milliseconds - "queueTime": 0, - "runTime": wall_time, + "queueTime": queue_time, + "runTime": run_time, "status": int(benchmark.get("exit_status", 0)), # "benchmark": benchmark, # full block — subparsers can pull extra fields from here } diff --git a/parsing/utils/benchmark_utils.sh b/parsing/utils/benchmark_utils.sh new file mode 100644 index 00000000..b141ba54 --- /dev/null +++ b/parsing/utils/benchmark_utils.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# benchmark_utils.sh +# Shared utilities for ATLAS benchmark scripts + +# Parse /usr/bin/time -v output from a log file and extract key metrics +# Usage: extract_time_metrics +extract_time_metrics() { + local log_file=$1 + + grep -E "User time|System time|Percent of CPU|Elapsed \(wall clock\)|Maximum resident set size|Major \(requiring I/O\)|Minor \(reclaiming|Voluntary context|Involuntary context|Exit status" "${log_file}" | \ + awk -F': ' ' + /User time/ { user=$2 } + /System time/ { sys=$2 } + /Percent of CPU/ { cpu=$2; gsub(/%/,"",cpu) } + /Elapsed \(wall clock\)/ { elapsed=$2 } + /Maximum resident set/ { maxrss=$2 } + /Major.*page faults/ { majflt=$2 } + /Minor.*page faults/ { minflt=$2 } + /Voluntary context/ { vcswitch=$2 } + /Involuntary context/ { ivcswitch=$2 } + /Exit status/ { exit_status=$2 } + END { + print "user_time_sec=" user + print "sys_time_sec=" sys + print "cpu_percent=" cpu + print "elapsed_time=" elapsed + print "max_rss_kb=" maxrss + print "major_page_faults=" majflt + print "minor_page_faults=" minflt + print "voluntary_ctx_switches=" vcswitch + print "involuntary_ctx_switches=" ivcswitch + print "exit_status=" exit_status + } + ' +} + +# Parse rucio download metrics from a rucio.log file +# Usage: extract_rucio_metrics +extract_rucio_metrics() { + local log_file=$1 + + awk ' + /Total files \(DID\)/ { total_did=$NF } + /Total files \(filtered\)/ { total_filtered=$NF } + /Downloaded files/ { downloaded=$NF } + /Files already found locally/ { already_local=$NF } + /Files that cannot be downloaded/ { failed=$NF } + /^[0-9]+[[:space:]]/ { du_kb=$1 } + END { + print "rucio_total_files_did=" total_did + print "rucio_total_files_filtered=" total_filtered + print "rucio_downloaded_files=" downloaded + print "rucio_already_local=" already_local + print "rucio_failed_files=" failed + print "rucio_du_kb=" du_kb + } + ' "${log_file}" +} + +# Append standardized benchmark block to a log file +# Usage: append_benchmark +append_benchmark() { + local log_file=$1 + local start_time=$2 + local wall_time_sec=$3 + local end_time=$4 + local mode=${5:-time_v} + + local extra_metrics="" + case "${mode}" in + time_v) extra_metrics=$(extract_time_metrics "${log_file}") ;; + rucio) extra_metrics=$(extract_rucio_metrics "${log_file}") ;; + none) ;; + esac + + cat >> "${log_file}" < Date: Fri, 8 May 2026 10:30:57 -0700 Subject: [PATCH 05/18] refactor: remove wall_time from benchmark scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit append_benchmark now takes (log_file, start_time, end_time, mode) — wall_time_sec is no longer written to the BENCHMARK block since parsers derive run time from end_time_utc - start_time_utc directly. Removes start_epoch, end_epoch, and wall_time variables from all 21 run scripts and drops the wall_time_sec field from benchmark_utils.sh. Co-Authored-By: Claude Sonnet 4.6 --- EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh | 5 +---- EVNT/BNL/EL9/run_evnt_el9_batch.sh | 5 +---- EVNT/BNL/Native/run_evnt_native_batch.sh | 5 +---- EVNT/UC/CentOS7/run_evnt_centos7_batch.sh | 5 +---- EVNT/UC/EL9/run_evnt_el9_batch.sh | 5 +---- EVNT/UC/Native/run_evnt_native_batch.sh | 5 +---- NTuple_Hist/coffea/BNL/run_example.sh | 5 +---- NTuple_Hist/coffea/UC/run_example.sh | 5 +---- .../event_loop/BNL/columnar/run_eventloop_arrays.sh | 8 ++------ .../event_loop/BNL/standard/run_eventloop_noarrays.sh | 5 +---- .../event_loop/UC/columnar/run_eventloop_arrays.sh | 5 +---- .../event_loop/UC/standard/run_eventloop_noarrays.sh | 5 +---- NTuple_Hist/fastframes/BNL/run_fastframes.sh | 5 +---- NTuple_Hist/fastframes/UC/run_fastframes.sh | 5 +---- Rucio/rucio_script.sh | 10 ++-------- TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh | 5 +---- TRUTH3/BNL/EL9/run_truth3_el9_batch.sh | 5 +---- TRUTH3/BNL/Native/run_truth3_native_batch.sh | 5 +---- TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh | 5 +---- TRUTH3/UC/EL9/run_truth3_el9_batch.sh | 5 +---- TRUTH3/UC/Native/run_truth3_native_batch.sh | 5 +---- parsing/utils/benchmark_utils.sh | 8 +++----- 22 files changed, 26 insertions(+), 95 deletions(-) diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index f3409969..b7341d98 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # Current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Sets up the container: ## -c : used to make a container followed by the OS we want to use @@ -19,8 +18,6 @@ source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c centos7 -m /atlasgpfs echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch/${start_time}" @@ -31,7 +28,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "time_v" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index 55eebd73..a3ed04b2 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # The OS used in the container OScontainer="el9" @@ -22,8 +21,6 @@ source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c ${OScontainer} -m /at echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch/${start_time}" @@ -34,7 +31,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "time_v" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index 58349a1b..a8d185be 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # The seed used in the job seed=1001 @@ -27,8 +26,6 @@ cat pipe_file.log >> log.generate date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch/${start_time}" @@ -39,7 +36,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "time_v" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh index cbfdc8fa..e69628ab 100755 --- a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh @@ -10,7 +10,6 @@ OS_container="centos7" seed=1001 start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -34,8 +33,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup cat pipe_file.log >> log.generate" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Appends time after Gen_tf.py to a log file { @@ -44,4 +41,4 @@ wall_time=$((end_epoch - start_epoch)) du EVNT.root } >> split.log -append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" diff --git a/EVNT/UC/EL9/run_evnt_el9_batch.sh b/EVNT/UC/EL9/run_evnt_el9_batch.sh index 8a758342..2067cf95 100755 --- a/EVNT/UC/EL9/run_evnt_el9_batch.sh +++ b/EVNT/UC/EL9/run_evnt_el9_batch.sh @@ -9,7 +9,6 @@ OS_container="el9" seed=1001 start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -30,8 +29,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c "${OS_container}" - cat pipe_file.log >> log.generate" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Appends time after Gen_tf.py to a log file { @@ -40,4 +37,4 @@ wall_time=$((end_epoch - start_epoch)) du EVNT.root } >> split.log -append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" diff --git a/EVNT/UC/Native/run_evnt_native_batch.sh b/EVNT/UC/Native/run_evnt_native_batch.sh index 23cc2275..8355aae3 100755 --- a/EVNT/UC/Native/run_evnt_native_batch.sh +++ b/EVNT/UC/Native/run_evnt_native_batch.sh @@ -6,7 +6,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" seed=1001 start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" @@ -32,8 +31,6 @@ cat pipe_file.log >> log.generate echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Appends time after Gen_tf.py to a log file echo "::group::Collect Metrics" @@ -44,4 +41,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "log.generate" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index d98d9e85..c8230897 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -5,7 +5,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) working_dir="/atlasgpfs01/usatlas/data/qlei/ntuple/coffea" @@ -29,8 +28,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 /usr/bin/time -v ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -42,7 +39,7 @@ output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${start_time}" mkdir -p "${output_dir}" -append_benchmark coffea_hist.log "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark coffea_hist.log "${start_time}" "${end_time}" "time_v" mv coffea_hist.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/coffea/UC/run_example.sh b/NTuple_Hist/coffea/UC/run_example.sh index c339c008..64f52f81 100755 --- a/NTuple_Hist/coffea/UC/run_example.sh +++ b/NTuple_Hist/coffea/UC/run_example.sh @@ -5,7 +5,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) #cp ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py . @@ -20,8 +19,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /data -r "ls /usr/bin/time -v ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) echo "::group::Collect Metrics" { @@ -30,4 +27,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "coffea_hist.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "coffea_hist.log" "${start_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index 16bc34b1..9f5cc11b 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -21,8 +20,6 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log } >> split.log end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays/${start_time}" @@ -30,16 +27,15 @@ mkdir -p "${output_dir}" echo "Start Time: ${start_time}" echo "End Time: ${end_time}" -echo "Wall Time: ${wall_time}" # Verify the log exists before appending if [ -f eventloop_arrays.log ]; then - append_benchmark eventloop_arrays.log "${start_time}" "${wall_time}" "${end_time}" "time_v" + append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" "time_v" else echo "ERROR: eventloop_arrays.log not found in $(pwd)" fi -# append_benchmark eventloop_arrays.log "${start_time}" "${wall_time}" "${end_time}" +# append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" mv eventloop_arrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 80694587..97484b13 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -22,14 +21,12 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ } >> split.log end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${start_time}" mkdir -p "${output_dir}" -append_benchmark eventloop_noarrays.log "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark eventloop_noarrays.log "${start_time}" "${end_time}" "time_v" mv eventloop_noarrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh index 9753a251..6c04e145 100755 --- a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh @@ -3,7 +3,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -21,8 +20,6 @@ echo "::group::EventLoop Execution" echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Collect metrics echo "::group::Collect Metrics" @@ -30,4 +27,4 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" -append_benchmark "eventloop_arrays.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "eventloop_arrays.log" "${start_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh index 38fb062d..18696655 100755 --- a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh @@ -3,7 +3,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -21,8 +20,6 @@ echo "::group::EventLoop Execution" echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Collect metrics echo "::group::Collect Metrics" @@ -30,4 +27,4 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" -append_benchmark "eventloop_noarrays.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "eventloop_noarrays.log" "${start_time}" "${end_time}" "time_v" diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 6fd0ab47..3b9f06b2 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -5,7 +5,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) cd /atlasgpfs01/usatlas/data/qlei/ || exit @@ -32,8 +31,6 @@ hostname >> split.log echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${start_time}" @@ -41,7 +38,7 @@ output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${start_time}" # Creates output dir mkdir -p "${output_dir}" -append_benchmark fastframes.log "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark fastframes.log "${start_time}" "${end_time}" "time_v" # Moves log to outputdir mv fastframes.log "${output_dir}" diff --git a/NTuple_Hist/fastframes/UC/run_fastframes.sh b/NTuple_Hist/fastframes/UC/run_fastframes.sh index 304415ca..9934d74d 100755 --- a/NTuple_Hist/fastframes/UC/run_fastframes.sh +++ b/NTuple_Hist/fastframes/UC/run_fastframes.sh @@ -5,7 +5,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" yml_dir="${GITHUB_WORKSPACE}/NTuple_Hist/fastframes/UC/" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Sets up our working environment echo "::group::setupATLAS" @@ -29,8 +28,6 @@ printf "\n" echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Getting the date and time after running script echo "::group::Collect Metrics" @@ -47,4 +44,4 @@ if [[ -d "${cleanup_dir}" && "${cleanup_dir}" == "/home/selbor/ntuple/fastframes rm -rf "${cleanup_dir:?}/"* fi -append_benchmark "fastframes.log" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "fastframes.log" "${start_time}" "${end_time}" "time_v" diff --git a/Rucio/rucio_script.sh b/Rucio/rucio_script.sh index 9ce16b1b..4c82e4d4 100755 --- a/Rucio/rucio_script.sh +++ b/Rucio/rucio_script.sh @@ -12,7 +12,6 @@ container_el9 (){ # - output_dir (3) # - download_ID (4) start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") - start_epoch=$(date -u +%s) cd "${1}" || exit export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -28,9 +27,7 @@ container_el9 (){ du \"${4#*:}\"/ >> rucio.log &&\ mv rucio.log \"${3}\"" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") - end_epoch=$(date -u +%s) - wall_time=$((end_epoch - start_epoch)) - append_benchmark "${3}/rucio.log" "${start_time}" "${wall_time}" "${end_time}" "rucio" + append_benchmark "${3}/rucio.log" "${start_time}" "${end_time}" "rucio" } native_el9 () { @@ -39,7 +36,6 @@ native_el9 () { # - job_dir # - download_ID start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") - start_epoch=$(date -u +%s) echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -57,13 +53,11 @@ native_el9 () { rucio download --rses AGLT2_LOCALGROUPDISK "${3}" 2>&1 | tee rucio.log echo "::endgroup::" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") - end_epoch=$(date -u +%s) - wall_time=$((end_epoch - start_epoch)) echo "::group::Collect Metrics" hostname >> rucio.log du "${3#*:}" >> rucio.log echo "::endgroup::" - append_benchmark "rucio.log" "${start_time}" "${wall_time}" "${end_time}" "rucio" + append_benchmark "rucio.log" "${start_time}" "${end_time}" "rucio" mv rucio.log "${1}" } diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index df2ffcc1..33aa2a59 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -5,7 +5,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -25,8 +24,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${start_time}" @@ -35,7 +32,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.EVNTtoDAOD "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark log.EVNTtoDAOD "${start_time}" "${end_time}" "time_v" # Moves the log file to the output directory mv log.EVNTtoDAOD "${output_dir}" diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index 0d5c5e99..d3394a65 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # Current time used for file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -24,8 +23,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athe date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Defines the output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch/${start_time}" @@ -37,7 +34,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.Derivation "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark log.Derivation "${start_time}" "${end_time}" "time_v" # Moves the log file to the output directory mv log.Derivation "${output_dir}" diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index dcecc1b5..0292451b 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -4,7 +4,6 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . @@ -28,8 +27,6 @@ cat pipe_file.log >> log.Derivation date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Defines the output directory output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch/${start_time}" @@ -41,7 +38,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.Derivation "${start_time}" "${wall_time}" "${end_time}" "truth_v" +append_benchmark log.Derivation "${start_time}" "${end_time}" "truth_v" # Moves the log file to the output directory mv log.Derivation "${output_dir}" diff --git a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh index 437c9cb7..073757b4 100755 --- a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh @@ -7,7 +7,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Sets up the ATLAS Environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -25,8 +24,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup cat pipe_file.log >> log.EVNTtoDAOD" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file { @@ -36,4 +33,4 @@ wall_time=$((end_epoch - start_epoch)) du DAOD_TRUTH3.TRUTH3.root } >> split.log -append_benchmark "log.EVNTtoDAOD" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.EVNTtoDAOD" "${start_time}" "${end_time}" "time_v" diff --git a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh index fc972fce..80b1a856 100755 --- a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh @@ -7,7 +7,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Sets up the environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -25,8 +24,6 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athe cat pipe_file.log >> log.Derivation" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file { @@ -36,4 +33,4 @@ wall_time=$((end_epoch - start_epoch)) du DAOD_TRUTH3.TRUTH3.root } >> split.log -append_benchmark "log.Derivation" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "time_v" diff --git a/TRUTH3/UC/Native/run_truth3_native_batch.sh b/TRUTH3/UC/Native/run_truth3_native_batch.sh index 4298872a..6510392c 100755 --- a/TRUTH3/UC/Native/run_truth3_native_batch.sh +++ b/TRUTH3/UC/Native/run_truth3_native_batch.sh @@ -7,7 +7,6 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -start_epoch=$(date -u +%s) # Sets up our environment echo "::group::setupATLAS" @@ -31,8 +30,6 @@ echo "::endgroup::" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") -end_epoch=$(date -u +%s) -wall_time=$((end_epoch - start_epoch)) # Obtains and appends the host machine and payload size to the log file echo "::group::Collect Metrics" @@ -44,4 +41,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "log.Derivation" "${start_time}" "${wall_time}" "${end_time}" "time_v" +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "time_v" diff --git a/parsing/utils/benchmark_utils.sh b/parsing/utils/benchmark_utils.sh index b141ba54..eb827cb4 100644 --- a/parsing/utils/benchmark_utils.sh +++ b/parsing/utils/benchmark_utils.sh @@ -58,13 +58,12 @@ extract_rucio_metrics() { } # Append standardized benchmark block to a log file -# Usage: append_benchmark +# Usage: append_benchmark [mode] append_benchmark() { local log_file=$1 local start_time=$2 - local wall_time_sec=$3 - local end_time=$4 - local mode=${5:-time_v} + local end_time=$3 + local mode=${4:-time_v} local extra_metrics="" case "${mode}" in @@ -76,7 +75,6 @@ append_benchmark() { cat >> "${log_file}" < Date: Fri, 8 May 2026 23:22:56 -0700 Subject: [PATCH 06/18] refactor: flatten parsing module, remove retired parsers All job types now use the BENCHMARK block approach via base_parser.py. Remove handlers/ subdirectory and all per-type parsers (evnt, truth3, rucio, coffea, eventloop, fastframes), ParsingClass from base/, and text_utils.py which had no remaining callers. Move base_parser.py to parsing/ root and update ci_parse.py import. Co-Authored-By: Claude Sonnet 4.6 --- parsing/base/parsing_base.py | 100 ------------------------- parsing/{handlers => }/base_parser.py | 0 parsing/handlers/__init__.py | 1 - parsing/handlers/coffea_parser.py | 66 ----------------- parsing/handlers/eventloop_parser.py | 37 ---------- parsing/handlers/evnt_parser.py | 23 ------ parsing/handlers/fastframes_parser.py | 56 -------------- parsing/handlers/rucio_parser.py | 65 ---------------- parsing/handlers/truth3_parser.py | 23 ------ parsing/scripts/ci_parse.py | 2 +- parsing/tests/test_parsers.py | 102 +------------------------- parsing/utils/text_utils.py | 30 -------- 12 files changed, 2 insertions(+), 503 deletions(-) delete mode 100644 parsing/base/parsing_base.py rename parsing/{handlers => }/base_parser.py (100%) delete mode 100644 parsing/handlers/__init__.py delete mode 100644 parsing/handlers/coffea_parser.py delete mode 100644 parsing/handlers/eventloop_parser.py delete mode 100644 parsing/handlers/evnt_parser.py delete mode 100644 parsing/handlers/fastframes_parser.py delete mode 100644 parsing/handlers/rucio_parser.py delete mode 100644 parsing/handlers/truth3_parser.py delete mode 100644 parsing/utils/text_utils.py diff --git a/parsing/base/parsing_base.py b/parsing/base/parsing_base.py deleted file mode 100644 index a0c9b012..00000000 --- a/parsing/base/parsing_base.py +++ /dev/null @@ -1,100 +0,0 @@ -import json -from pathlib import Path -from collections.abc import Callable -import hashlib -from datetime import datetime, timedelta - - -class ParsingClass: - def __init__(self, log_dir: str, state_file: str = "state/parsed_state.json"): - self.log_dir = Path(log_dir) - self.state_file = Path(state_file) - - # Initializes state - self.state = self._load_state() - - # Register: pattern -> parsing function - self.parsers = {} - - def _load_state(self) -> dict: - if self.state_file.exists(): - with open(self.state) as f: - return json.load(f) - return {"parsed_files": {}} - - def _save_state(self): - self.state_file.parent.mkdir(parents=True, exist_ok=True) - with open(self.state_file, "w") as f: - json.dump(self.state, f, indent=2) - - def _file_hash(self, file_path: Path) -> str: - """Creates file hash to detect any changes""" - hasher = hashlib.md5() - with open(file_path, "rb") as f: - hasher.update(f.read()) - return hasher.hexdigest() - - def _marked_as_parsed(self, file_path: Path, file_hash: str): - self.state["parsed_files"][str(file_path)] = file_hash - self._save_state() - - def _has_been_parsed(self, file_path: Path, file_hash: str) -> bool: - return ( - str(file_path) in self.state["parsed_files"] - and self.state["parsed_files"][str(file_path)] == file_hash - ) - - def register_parsers(self, patterns: str, func: Callable): - """Registers functions for log types""" - self.parsers[patterns] = func - - def _recent_dirs(self, days=7): - cutoff = datetime.now() - timedelta(days=days) - recent = [] - - for d in self.log_dir.iterdir(): - if d.is_dir(): - try: - dt = datetime.strptime(d.name, "%Y.%m.%dT%H") - if dt >= cutoff: - recent.append(d) - except ValueError: - continue - return recent - - def discover_logs(self): - logs = [] - for d in self._recent_dirs(days=7): - for pattern in self.parsers: - logs.extend(d.rglob(pattern)) - return logs - - def parse_all(self): - logs = self.discover_logs() - print(f"Found {len(logs)} logs") - - for log_file in logs: - file_hash = self._file_hash(log_file) - - if self._has_been_parsed(log_file, file_hash): - print(f"[SKIP] Already Parsed: {log_file}") - continue - - print(f"[PARSE] Parsing: {log_file}") - self._parse_file(log_file) - - # Marks files as successful parsing - self._mark_as_parsed(log_file, file_hash) - - def _mark_as_parsed(self, path, file_hash): - """Records that a file has been successfully parsed.""" - self.state["parsed_files"][str(path)] = file_hash - self._save_state() - - def _parse_file(self, log_file: Path): - for pattern, func in self.parsers.items(): - if log_file.match(pattern): - func(log_file) - return - - raise ValueError(f"No parser registered for file: {log_file}") diff --git a/parsing/handlers/base_parser.py b/parsing/base_parser.py similarity index 100% rename from parsing/handlers/base_parser.py rename to parsing/base_parser.py diff --git a/parsing/handlers/__init__.py b/parsing/handlers/__init__.py deleted file mode 100644 index 28435e3a..00000000 --- a/parsing/handlers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Log parser handlers for different benchmark types.""" diff --git a/parsing/handlers/coffea_parser.py b/parsing/handlers/coffea_parser.py deleted file mode 100644 index 13921df7..00000000 --- a/parsing/handlers/coffea_parser.py +++ /dev/null @@ -1,66 +0,0 @@ -import datetime as dt - - -def parse_coffea_log(path): - """Parse Coffea analysis log file for timing information. - - Args: - path: Path to coffea_hist.log file - - Returns: - dict: Parsed timing data with keys: - - submitTime: UTC timestamp in milliseconds - - queueTime: Queue time in seconds (always 0) - - runTime: Execution time in seconds - - frequency: Processing frequency in kHz - - status: Exit status (0 = success) - """ - print(f"[Coffea NTuple->Hist] Parsing {path.name}") - - with open(path) as f: - file_lines = f.readlines() - - # Parse execution time and frequency from line 2 - # Format: "... execution time: 205.45 s ( 89.10 kHz)" - line_list = file_lines[1].split(" ") - run_time = round(float(line_list[3])) - frequency = round(float(line_list[-2])) - - # Parse UTC timestamp from end of file - # Format: "start_time_utc=2025-12-17T00:48:07.953454Z" - start_time_line = None - - for line in file_lines: - if line.startswith("start_time_utc="): - start_time_line = line.strip() - break - - if not start_time_line: - raise ValueError("No start_time_utc found in log file") - - # Extract timestamp string after the = sign - start_time_str = start_time_line.split("=")[1] - - # Parse ISO 8601 format with Z suffix (UTC) - # Format: 2025-12-17T00:48:07.953454Z - start_dt = dt.datetime.fromisoformat(start_time_str.rstrip("Z")).replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_dt.timestamp() * 1000) - - status = 0 - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("coffea_hist.log", parse_coffea_log) diff --git a/parsing/handlers/eventloop_parser.py b/parsing/handlers/eventloop_parser.py deleted file mode 100644 index e7dcd3e2..00000000 --- a/parsing/handlers/eventloop_parser.py +++ /dev/null @@ -1,37 +0,0 @@ -import datetime as dt - - -def parse_eventloop_log(path): - print(f"[EventLoop] Parsing {path.name}") - - with open(path) as f: - lines = f.readlines() - - # EventLoop benchmark info - block = lines[-8:] - - start_datetime = block[1].split("=", 1)[1].strip() - run_time = int(float(block[3].split("=", 1)[1].strip())) - frequency = int(float(block[-3].split("=", 1)[1].strip())) - - start_dt = dt.datetime.fromisoformat(start_datetime.rstrip("Z")).replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_dt.timestamp() * 1000) - - status = 0 - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("eventloop_arrays.log", parse_eventloop_log) diff --git a/parsing/handlers/evnt_parser.py b/parsing/handlers/evnt_parser.py deleted file mode 100644 index cd2ad17b..00000000 --- a/parsing/handlers/evnt_parser.py +++ /dev/null @@ -1,23 +0,0 @@ -"""EVNT generation log parser. - -Parses logs from ATLAS Monte Carlo event generation jobs to extract timing information. -""" - -from parsing.handlers.base_parser import parse_atlas_log - - -def parse_evnt_log(path): - """Parse EVNT generation log file. - - Args: - path: Path to log.generate file - - Returns: - dict: Parsed timing data - """ - return parse_atlas_log(path, log_name="EVNT") - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("log.generate", parse_evnt_log) diff --git a/parsing/handlers/fastframes_parser.py b/parsing/handlers/fastframes_parser.py deleted file mode 100644 index ff9c48a2..00000000 --- a/parsing/handlers/fastframes_parser.py +++ /dev/null @@ -1,56 +0,0 @@ -import datetime as dt - -from parsing.utils.text_utils import strip_ansi - -date_format = "%Y-%m-%d %H:%M:%S" - - -def elapsed_to_seconds(s): - s = s.rstrip("m") - minutes, seconds = map(int, s.split(":")) - return minutes * 60 + seconds - - -def parse_fastframes_log(path): - print(f"[FastFrames] Parsing {path.name}") - with open(path) as f: - file_lines = f.readlines() - N = len(file_lines) - cleaned_lines = [strip_ansi(lines) for lines in file_lines[N - 2 : N]] - line1 = cleaned_lines[0].split(" ") # processed/total events, elapsed time - elapsed_time = line1[3] - processed_events = int(line1[13]) - if processed_events == 18304905: - status = 0 - else: - status = 1 - - line2 = cleaned_lines[1].split(" ") - date = line2[13] - time = line2[14] - - combined = f"{date} {time}" - - # It was submitted to the batch so it's in UTC TimeZone already - dt_obj = dt.datetime.strptime(combined, "%d-%m-%Y %H:%M:%S").replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(dt_obj.timestamp() * 1000) - - run_time = int(elapsed_to_seconds(elapsed_time)) - frequency = int((processed_events / run_time) / 1000) - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("fastframes.log", parse_fastframes_log) diff --git a/parsing/handlers/rucio_parser.py b/parsing/handlers/rucio_parser.py deleted file mode 100644 index d4eacab4..00000000 --- a/parsing/handlers/rucio_parser.py +++ /dev/null @@ -1,65 +0,0 @@ -from collections import deque -import datetime as dt - -from parsing.utils.text_utils import strip_ansi - -date_format = "%Y-%m-%d %H:%M:%S" - - -def parse_rucio_log(path): - print(f"Rucio Parsing {path.name}") - - first_line = None - last_lines = deque(maxlen=12) - - with open(path) as f: - for line in f: - if "Processing 1 item(s) for input" in line and first_line is None: - first_line = line - last_lines.append(line) - - payload_line = last_lines[-1] - last_line = last_lines[0] if len(last_lines) == 12 else None - - first_line = strip_ansi(first_line).split(" ") - start_date_string = first_line[0] - start_time_string = first_line[1].split(",")[0] - - last_line = strip_ansi(last_line).split(" ") - end_date_string = last_line[0] - end_time_string = last_line[1].split(",")[0] - - # Obtaining the payload for status check; casted as int - payload = int(payload_line.split("\t")[0]) - if payload != 0: - status = 0 - else: - status = 1 - - # Creating start and end time objects (explicitly UTC to avoid local timezone interpretation) - start_datetime_string = start_date_string + " " + start_time_string - start_dt = dt.datetime.strptime(start_datetime_string, date_format).replace( - tzinfo=dt.timezone.utc - ) - end_datetime_string = end_date_string + " " + end_time_string - end_dt = dt.datetime.strptime(end_datetime_string, date_format).replace( - tzinfo=dt.timezone.utc - ) - - # Obtains timestamp and run_time - utc_timestamp = int(start_dt.timestamp()) * 1000 - run_time = int((end_dt - start_dt).total_seconds()) - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("rucio.log", parse_rucio_log) diff --git a/parsing/handlers/truth3_parser.py b/parsing/handlers/truth3_parser.py deleted file mode 100644 index e566e260..00000000 --- a/parsing/handlers/truth3_parser.py +++ /dev/null @@ -1,23 +0,0 @@ -"""TRUTH3 derivation log parser. - -Parses logs from ATLAS TRUTH3 derivation jobs to extract timing information. -""" - -from parsing.handlers.base_parser import parse_atlas_log - - -def parse_truth3_log(path): - """Parse TRUTH3 derivation log file. - - Args: - path: Path to log.EVNTtoDAOD or log.Derivation file - - Returns: - dict: Parsed timing data - """ - return parse_atlas_log(path, log_name="TRUTH3") - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("log.EVNTtoDAOD", parse_truth3_log) diff --git a/parsing/scripts/ci_parse.py b/parsing/scripts/ci_parse.py index 29422a5b..c6056750 100755 --- a/parsing/scripts/ci_parse.py +++ b/parsing/scripts/ci_parse.py @@ -13,7 +13,7 @@ from rich.panel import Panel from rich.syntax import Syntax -from parsing.handlers import base_parser +from parsing import base_parser # Initialize rich console console = Console() diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index 8863b1bc..266fcc81 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -1,101 +1 @@ -"""Test suite for benchmark log parsers. - -This module tests all parsers against example logs to ensure -they correctly extract timing and status information. These tests -serve as a regression suite when refactoring parser code. -""" - -from pathlib import Path - -# Import all parsers -from parsing.handlers.truth3_parser import parse_truth3_log -from parsing.handlers.evnt_parser import parse_evnt_log -from parsing.handlers.rucio_parser import parse_rucio_log -from parsing.handlers.coffea_parser import parse_coffea_log -from parsing.handlers.fastframes_parser import parse_fastframes_log - -# Path to example logs directory -EXAMPLE_LOGS = Path(__file__).parent.parent / "example-logs" - - -class TestTruth3Parser: - """Tests for TRUTH3 derivation log parser.""" - - def test_parse_truth3_example_log(self): - """Test parsing of example TRUTH3 derivation log.""" - log_file = EXAMPLE_LOGS / "log.Derivation" - result = parse_truth3_log(log_file) - - # Validate actual values from example log (not just key existence) - # These expected values come from running parser on example log - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216819000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 48, "Runtime should be 48 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestEvntParser: - """Tests for EVNT generation log parser.""" - - def test_parse_evnt_example_log(self): - """Test parsing of example EVNT generation log.""" - log_file = EXAMPLE_LOGS / "log.generate" - result = parse_evnt_log(log_file) - - # Validate actual values to catch calculation regressions - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216848000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 2418, "Runtime should be 2418 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestRucioParser: - """Tests for Rucio data download log parser.""" - - def test_parse_rucio_example_log(self): - """Test parsing of example Rucio download log.""" - log_file = EXAMPLE_LOGS / "rucio.log" - result = parse_rucio_log(log_file) - - # Check actual parsed values - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216822000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 31, "Runtime should be 31 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestFastFramesParser: - """Tests for FastFrames analysis log parser.""" - - def test_parse_fastframes_example_log(self): - """Test parsing of example FastFrames log.""" - log_file = EXAMPLE_LOGS / "fastframes.log" - result = parse_fastframes_log(log_file) - - # Check actual parsed values including frequency - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765217167000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 345, "Runtime should be 345 seconds" - assert result["frequency"] == 53, "Frequency should be 53" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestCoffeaParser: - """Tests for Coffea analysis log parser.""" - - def test_parse_coffea_example_log(self): - """Test parsing of example Coffea log.""" - log_file = EXAMPLE_LOGS / "coffea_hist.log" - result = parse_coffea_log(log_file) - - # Check actual parsed values including frequency - # Timestamps are in UTC (parsed from ISO 8601 format with Z suffix) - assert result["submitTime"] == 1765932487953, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 205, "Runtime should be 205 seconds" - assert result["frequency"] == 89, "Frequency should be 89 kHz" - assert result["status"] == 0, "Status should be 0 (success)" +"""Test suite for benchmark log parsers.""" diff --git a/parsing/utils/text_utils.py b/parsing/utils/text_utils.py deleted file mode 100644 index 30fe0e9d..00000000 --- a/parsing/utils/text_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Text processing utilities for log parsing. - -This module provides common text manipulation functions used across -different log parsers. -""" - -import re - - -# Regex pattern to match ANSI escape sequences (color codes, etc.) -ANSI_ESCAPE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") - - -def strip_ansi(text): - """Remove ANSI escape sequences from text. - - ANSI escape sequences are used for terminal colors and formatting. - This function removes them to get clean text for parsing. - - Args: - text (str): Text potentially containing ANSI codes - - Returns: - str: Text with ANSI codes removed - - Example: - >>> strip_ansi("\\x1B[32mGreen text\\x1B[0m") - 'Green text' - """ - return ANSI_ESCAPE.sub("", text) From ec31ae8e379c5dc0ee9068b59872cbc1aca382af Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Fri, 8 May 2026 23:25:32 -0700 Subject: [PATCH 07/18] test: add tests for parse_benchmark_block and parse_atlas_log Co-Authored-By: Claude Sonnet 4.6 --- parsing/tests/test_parsers.py | 123 ++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index 266fcc81..90b4b288 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -1 +1,124 @@ """Test suite for benchmark log parsers.""" + +import pytest + +from parsing.base_parser import parse_atlas_log, parse_benchmark_block + + +class TestParseBenchmarkBlock: + def test_parses_basic_block(self): + lines = [ + "some log line\n", + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:19Z\n", + "end_time_utc=2025-12-08T18:01:07Z\n", + "exit_status=0\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result == { + "start_time_utc": "2025-12-08T18:00:19Z", + "end_time_utc": "2025-12-08T18:01:07Z", + "exit_status": "0", + } + + def test_returns_last_block_when_multiple(self): + lines = [ + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:00Z\n", + "end_time_utc=2025-12-08T18:00:30Z\n", + "exit_status=1\n", + "=================\n", + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:01:00Z\n", + "end_time_utc=2025-12-08T18:01:30Z\n", + "exit_status=0\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result["start_time_utc"] == "2025-12-08T18:01:00Z" + assert result["exit_status"] == "0" + + def test_returns_empty_dict_if_no_block(self): + lines = ["some line\n", "another line\n"] + assert parse_benchmark_block(lines) == {} + + def test_preserves_extra_fields(self): + lines = [ + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:00Z\n", + "end_time_utc=2025-12-08T18:00:30Z\n", + "exit_status=0\n", + "user_time_sec=25.5\n", + "max_rss_kb=512000\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result["user_time_sec"] == "25.5" + assert result["max_rss_kb"] == "512000" + + +class TestParseAtlasLog: + def test_parses_timing_fields(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "some header\n" + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:48Z\n" + "end_time_utc=2025-12-08T18:41:06Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216848000 + assert result["queueTime"] == 0 + assert result["runTime"] == 2418 + assert result["status"] == 0 + + def test_nonzero_exit_status(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "exit_status=127\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["status"] == 127 + + def test_missing_exit_status_defaults_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["status"] == 0 + + def test_raises_if_no_benchmark_block(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text("some log content without a benchmark block\n") + with pytest.raises(ValueError, match="No BENCHMARK block"): + parse_atlas_log(log_file) + + def test_uses_last_benchmark_block(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:10Z\n" + "exit_status=1\n" + "=================\n" + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:01:00Z\n" + "end_time_utc=2025-12-08T18:01:45Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216860000 + assert result["runTime"] == 45 + assert result["status"] == 0 From 262fa3471a5943a45a77dadf2113f9e2dbe6f51f Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Mon, 11 May 2026 09:50:55 -0700 Subject: [PATCH 08/18] feat: add script to compare histogram outputs across frameworks Loads coffea, eventloop, and fastframes photon pT histograms and prints an integral/ratio summary and saves an overlay plot with a ratio panel. Highlights the key difference: coffea/eventloop apply an event-level tightID cut while fastframes fills underflow for events with no tightID photon via sorted ph1_pt1_NOSYS. Co-Authored-By: Claude Sonnet 4.6 --- NTuple_Hist/compare_outputs.py | 162 +++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 NTuple_Hist/compare_outputs.py diff --git a/NTuple_Hist/compare_outputs.py b/NTuple_Hist/compare_outputs.py new file mode 100644 index 00000000..0cd0c4e1 --- /dev/null +++ b/NTuple_Hist/compare_outputs.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +"""Compare photon pT histogram outputs across NTuple-to-histogram frameworks. + +Requires: uproot, numpy, matplotlib + pip install uproot numpy matplotlib + +Usage: + python compare_outputs.py \\ + --coffea coffea.root \\ + --eventloop event_loop_noarrays_output_hist.root \\ + --fastframes /srv/output/histograms.root + +FastFrames histogram name: FastFrames uses the convention +{sample}_{region}_{variable}_{systematic}, so for the default BNL config +the histogram is "example_FS_Muon_ph_pt_NOSYS". Override with +--fastframes-hist if your config differs. + +Key difference to look for: coffea and eventloop apply an event-level +tightID cut (skip events with no tightID photon), while FastFrames +fills the underflow with events where the sorted tightID list is empty +(ph1_pt1_NOSYS = -0.999 GeV). The in-range integrals should be close +but FastFrames will have extra entries in the underflow. +""" + +import argparse + +import numpy as np +import uproot +import matplotlib.pyplot as plt + + +def load_th1(path, name): + """Return (bin_values, bin_edges) arrays for a TH1 in a ROOT file.""" + with uproot.open(path) as f: + available = list(f.keys()) + if name not in available: + raise KeyError( + f"{name!r} not found in {path}.\nAvailable keys: {available}" + ) + values, edges = f[name].to_numpy() + return values, edges + + +def weighted_integral(values, edges): + return float(np.sum(values * (edges[1:] - edges[:-1]))) + + +def main(): + parser = argparse.ArgumentParser( + description="Compare photon pT histograms across NTuple-to-histogram frameworks", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("--coffea", required=True, help="coffea output ROOT file") + parser.add_argument( + "--coffea-hist", default="all", metavar="NAME", + help="histogram name in coffea ROOT file (default: all)", + ) + parser.add_argument("--eventloop", required=True, help="eventloop output ROOT file") + parser.add_argument( + "--eventloop-hist", default="baseline_pt_total", metavar="NAME", + help="histogram name in eventloop ROOT file (default: baseline_pt_total)", + ) + parser.add_argument("--fastframes", required=True, help="fastframes output ROOT file") + parser.add_argument( + "--fastframes-hist", default="example_FS_Muon_ph_pt_NOSYS", metavar="NAME", + help="histogram name in fastframes ROOT file (default: example_FS_Muon_ph_pt_NOSYS)", + ) + parser.add_argument( + "--plot", default="comparison.png", metavar="PATH", + help="output plot file (default: comparison.png; pass empty string to skip)", + ) + args = parser.parse_args() + + coffea_vals, coffea_edges = load_th1(args.coffea, args.coffea_hist) + el_vals, el_edges = load_th1(args.eventloop, args.eventloop_hist) + ff_vals, ff_edges = load_th1(args.fastframes, args.fastframes_hist) + + binning_consistent = np.allclose(coffea_edges, el_edges) and np.allclose( + coffea_edges, ff_edges + ) + if not binning_consistent: + print("WARNING: histogram binning differs across frameworks") + for label, edges in [ + ("coffea", coffea_edges), + ("eventloop", el_edges), + ("fastframes", ff_edges), + ]: + print(f" {label}: {len(edges) - 1} bins, [{edges[0]:.1f}, {edges[-1]:.1f}]") + + # Summary table + print(f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}") + print("-" * 58) + for label, vals, edges in [ + ("coffea", coffea_vals, coffea_edges), + ("eventloop", el_vals, el_edges), + ("fastframes", ff_vals, ff_edges), + ]: + print( + f"{label:<12}" + f" {weighted_integral(vals, edges):>14.4f}" + f" {float(np.max(vals)):>12.4f}" + f" {int(np.sum(vals > 0)):>16d}" + ) + + if binning_consistent: + with np.errstate(divide="ignore", invalid="ignore"): + ratio_el = np.where(coffea_vals != 0, el_vals / coffea_vals, np.nan) + ratio_ff = np.where(coffea_vals != 0, ff_vals / coffea_vals, np.nan) + print(f"\nMean bin ratio (where coffea > 0):") + print(f" EventLoop / Coffea : {np.nanmean(ratio_el):.4f}") + print(f" FastFrames / Coffea : {np.nanmean(ratio_ff):.4f}") + + if not args.plot: + return + + fig, (ax_top, ax_bot) = plt.subplots( + 2, 1, figsize=(8, 8), + gridspec_kw={"height_ratios": [3, 1]}, + sharex=True, + ) + + for label, vals, edges, color, ls in [ + ("Coffea", coffea_vals, coffea_edges, "tab:blue", "-"), + ("EventLoop", el_vals, el_edges, "tab:green", "--"), + ("FastFrames", ff_vals, ff_edges, "tab:red", ":"), + ]: + ax_top.stairs(vals, edges, label=label, color=color, linestyle=ls, linewidth=1.5) + + ax_top.set_ylabel("Events / bin") + ax_top.set_yscale("log") + ax_top.legend() + ax_top.set_title(r"Photon $p_\mathrm{T}$: Coffea vs EventLoop vs FastFrames") + ax_top.set_xlim(coffea_edges[0], coffea_edges[-1]) + + if binning_consistent: + ax_bot.axhline(1.0, color="black", linewidth=0.8, linestyle="-") + ax_bot.stairs( + ratio_el, coffea_edges, + label="EventLoop / Coffea", color="tab:green", linestyle="--", linewidth=1.5, + ) + ax_bot.stairs( + ratio_ff, coffea_edges, + label="FastFrames / Coffea", color="tab:red", linestyle=":", linewidth=1.5, + ) + ax_bot.set_ylim(0.5, 1.5) + ax_bot.set_ylabel("Ratio to Coffea") + ax_bot.legend(fontsize=9) + else: + ax_bot.text( + 0.5, 0.5, "Binning mismatch — ratio unavailable", + ha="center", va="center", transform=ax_bot.transAxes, + ) + + ax_bot.set_xlabel(r"Photon $p_\mathrm{T}$ [GeV]") + fig.tight_layout() + fig.savefig(args.plot, dpi=150) + print(f"\nPlot saved to: {args.plot}") + + +if __name__ == "__main__": + main() From 46c4bea3786d8a92716925a1ed4fe9a50e53d01c Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Mon, 11 May 2026 10:06:20 -0700 Subject: [PATCH 09/18] refactor: rewrite compare_outputs.py using ROOT instead of matplotlib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace uproot + numpy + matplotlib with PyROOT — available on all ATLAS analysis facilities without extra installs. Output default changed to PDF (vector, better for publication). Co-Authored-By: Claude Sonnet 4.6 --- NTuple_Hist/compare_outputs.py | 255 +++++++++++++++++++++------------ 1 file changed, 163 insertions(+), 92 deletions(-) diff --git a/NTuple_Hist/compare_outputs.py b/NTuple_Hist/compare_outputs.py index 0cd0c4e1..0696ee3f 100644 --- a/NTuple_Hist/compare_outputs.py +++ b/NTuple_Hist/compare_outputs.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 """Compare photon pT histogram outputs across NTuple-to-histogram frameworks. -Requires: uproot, numpy, matplotlib - pip install uproot numpy matplotlib +Requires: ROOT (PyROOT) — available on ATLAS analysis facilities. Usage: python compare_outputs.py \\ @@ -24,25 +23,57 @@ import argparse -import numpy as np -import uproot -import matplotlib.pyplot as plt +import ROOT + +ROOT.gROOT.SetBatch(True) +ROOT.gStyle.SetOptStat(0) +ROOT.gStyle.SetOptTitle(0) def load_th1(path, name): - """Return (bin_values, bin_edges) arrays for a TH1 in a ROOT file.""" - with uproot.open(path) as f: - available = list(f.keys()) - if name not in available: - raise KeyError( - f"{name!r} not found in {path}.\nAvailable keys: {available}" - ) - values, edges = f[name].to_numpy() - return values, edges + """Load a TH1 from a ROOT file, detached from the file.""" + f = ROOT.TFile.Open(path, "READ") + if not f or f.IsZombie(): + raise OSError(f"Cannot open {path}") + h = f.Get(name) + if not h: + keys = [k.GetName() for k in f.GetListOfKeys()] + raise KeyError(f"{name!r} not found in {path}.\nAvailable keys: {keys}") + h = h.Clone() + h.SetDirectory(0) + f.Close() + return h + + +def check_binning(hists): + ref_label, ref_h = hists[0] + ok = True + for label, h in hists[1:]: + if h.GetNbinsX() != ref_h.GetNbinsX() or \ + h.GetXaxis().GetXmin() != ref_h.GetXaxis().GetXmin() or \ + h.GetXaxis().GetXmax() != ref_h.GetXaxis().GetXmax(): + print(f"WARNING: binning of {label!r} differs from {ref_label!r}") + ok = False + return ok + + +def print_summary(hists): + print(f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}") + print("-" * 58) + for label, h in hists: + integral = h.Integral() + peak = h.GetMaximum() + nonzero = sum( + 1 for i in range(1, h.GetNbinsX() + 1) if h.GetBinContent(i) > 0 + ) + print(f"{label:<12} {integral:>14.4f} {peak:>12.4f} {nonzero:>16d}") -def weighted_integral(values, edges): - return float(np.sum(values * (edges[1:] - edges[:-1]))) +def make_ratio(h_num, h_den, name): + ratio = h_num.Clone(name) + ratio.SetDirectory(0) + ratio.Divide(h_den) + return ratio def main(): @@ -67,94 +98,134 @@ def main(): help="histogram name in fastframes ROOT file (default: example_FS_Muon_ph_pt_NOSYS)", ) parser.add_argument( - "--plot", default="comparison.png", metavar="PATH", - help="output plot file (default: comparison.png; pass empty string to skip)", + "--plot", default="comparison.pdf", metavar="PATH", + help="output plot file (default: comparison.pdf; pass empty string to skip)", ) args = parser.parse_args() - coffea_vals, coffea_edges = load_th1(args.coffea, args.coffea_hist) - el_vals, el_edges = load_th1(args.eventloop, args.eventloop_hist) - ff_vals, ff_edges = load_th1(args.fastframes, args.fastframes_hist) + coffea_h = load_th1(args.coffea, args.coffea_hist) + el_h = load_th1(args.eventloop, args.eventloop_hist) + ff_h = load_th1(args.fastframes, args.fastframes_hist) - binning_consistent = np.allclose(coffea_edges, el_edges) and np.allclose( - coffea_edges, ff_edges - ) - if not binning_consistent: - print("WARNING: histogram binning differs across frameworks") - for label, edges in [ - ("coffea", coffea_edges), - ("eventloop", el_edges), - ("fastframes", ff_edges), - ]: - print(f" {label}: {len(edges) - 1} bins, [{edges[0]:.1f}, {edges[-1]:.1f}]") - - # Summary table - print(f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}") - print("-" * 58) - for label, vals, edges in [ - ("coffea", coffea_vals, coffea_edges), - ("eventloop", el_vals, el_edges), - ("fastframes", ff_vals, ff_edges), - ]: - print( - f"{label:<12}" - f" {weighted_integral(vals, edges):>14.4f}" - f" {float(np.max(vals)):>12.4f}" - f" {int(np.sum(vals > 0)):>16d}" - ) + hists = [("coffea", coffea_h), ("eventloop", el_h), ("fastframes", ff_h)] + + binning_ok = check_binning(hists) + print_summary(hists) - if binning_consistent: - with np.errstate(divide="ignore", invalid="ignore"): - ratio_el = np.where(coffea_vals != 0, el_vals / coffea_vals, np.nan) - ratio_ff = np.where(coffea_vals != 0, ff_vals / coffea_vals, np.nan) + if binning_ok: + ratio_el = make_ratio(el_h, coffea_h, "ratio_el") + ratio_ff = make_ratio(ff_h, coffea_h, "ratio_ff") + + mean_el = sum( + ratio_el.GetBinContent(i) + for i in range(1, ratio_el.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ) / max( + 1, + sum(1 for i in range(1, coffea_h.GetNbinsX() + 1) if coffea_h.GetBinContent(i) > 0), + ) + mean_ff = sum( + ratio_ff.GetBinContent(i) + for i in range(1, ratio_ff.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ) / max( + 1, + sum(1 for i in range(1, coffea_h.GetNbinsX() + 1) if coffea_h.GetBinContent(i) > 0), + ) print(f"\nMean bin ratio (where coffea > 0):") - print(f" EventLoop / Coffea : {np.nanmean(ratio_el):.4f}") - print(f" FastFrames / Coffea : {np.nanmean(ratio_ff):.4f}") + print(f" EventLoop / Coffea : {mean_el:.4f}") + print(f" FastFrames / Coffea : {mean_ff:.4f}") if not args.plot: return - fig, (ax_top, ax_bot) = plt.subplots( - 2, 1, figsize=(8, 8), - gridspec_kw={"height_ratios": [3, 1]}, - sharex=True, - ) - - for label, vals, edges, color, ls in [ - ("Coffea", coffea_vals, coffea_edges, "tab:blue", "-"), - ("EventLoop", el_vals, el_edges, "tab:green", "--"), - ("FastFrames", ff_vals, ff_edges, "tab:red", ":"), - ]: - ax_top.stairs(vals, edges, label=label, color=color, linestyle=ls, linewidth=1.5) - - ax_top.set_ylabel("Events / bin") - ax_top.set_yscale("log") - ax_top.legend() - ax_top.set_title(r"Photon $p_\mathrm{T}$: Coffea vs EventLoop vs FastFrames") - ax_top.set_xlim(coffea_edges[0], coffea_edges[-1]) - - if binning_consistent: - ax_bot.axhline(1.0, color="black", linewidth=0.8, linestyle="-") - ax_bot.stairs( - ratio_el, coffea_edges, - label="EventLoop / Coffea", color="tab:green", linestyle="--", linewidth=1.5, - ) - ax_bot.stairs( - ratio_ff, coffea_edges, - label="FastFrames / Coffea", color="tab:red", linestyle=":", linewidth=1.5, - ) - ax_bot.set_ylim(0.5, 1.5) - ax_bot.set_ylabel("Ratio to Coffea") - ax_bot.legend(fontsize=9) + canvas = ROOT.TCanvas("comparison", "Framework Comparison", 800, 800) + pad_top = ROOT.TPad("pad_top", "", 0, 0.3, 1, 1) + pad_bot = ROOT.TPad("pad_bot", "", 0, 0, 1, 0.3) + pad_top.SetBottomMargin(0.03) + pad_top.SetTopMargin(0.08) + pad_bot.SetTopMargin(0.03) + pad_bot.SetBottomMargin(0.32) + pad_top.Draw() + pad_bot.Draw() + + # --- top pad --- + pad_top.cd() + pad_top.SetLogy() + + coffea_h.SetLineColor(ROOT.kBlue) + coffea_h.SetLineWidth(2) + coffea_h.GetYaxis().SetTitle("Events / bin") + coffea_h.GetYaxis().SetTitleSize(0.05) + coffea_h.GetYaxis().SetLabelSize(0.04) + coffea_h.GetXaxis().SetLabelSize(0) + coffea_h.Draw("HIST") + + el_h.SetLineColor(ROOT.kGreen + 2) + el_h.SetLineWidth(2) + el_h.SetLineStyle(2) + el_h.Draw("HIST SAME") + + ff_h.SetLineColor(ROOT.kRed) + ff_h.SetLineWidth(2) + ff_h.SetLineStyle(3) + ff_h.Draw("HIST SAME") + + legend = ROOT.TLegend(0.58, 0.68, 0.88, 0.88) + legend.SetBorderSize(0) + legend.AddEntry(coffea_h, "Coffea", "l") + legend.AddEntry(el_h, "EventLoop", "l") + legend.AddEntry(ff_h, "FastFrames", "l") + legend.Draw() + + title_latex = ROOT.TLatex() + title_latex.SetNDC() + title_latex.SetTextSize(0.05) + title_latex.DrawLatex(0.12, 0.93, "Photon p_{T}: Coffea vs EventLoop vs FastFrames") + + # --- bottom pad --- + pad_bot.cd() + + if binning_ok: + ratio_el.SetLineColor(ROOT.kGreen + 2) + ratio_el.SetLineWidth(2) + ratio_el.SetLineStyle(2) + ratio_el.GetYaxis().SetTitle("Ratio to Coffea") + ratio_el.GetYaxis().SetRangeUser(0.5, 1.5) + ratio_el.GetYaxis().SetNdivisions(505) + ratio_el.GetYaxis().SetTitleSize(0.11) + ratio_el.GetYaxis().SetTitleOffset(0.45) + ratio_el.GetYaxis().SetLabelSize(0.09) + ratio_el.GetXaxis().SetTitle("Photon p_{T} [GeV]") + ratio_el.GetXaxis().SetTitleSize(0.12) + ratio_el.GetXaxis().SetLabelSize(0.10) + ratio_el.Draw("HIST") + + ratio_ff.SetLineColor(ROOT.kRed) + ratio_ff.SetLineWidth(2) + ratio_ff.SetLineStyle(3) + ratio_ff.Draw("HIST SAME") + + xmin = coffea_h.GetXaxis().GetXmin() + xmax = coffea_h.GetXaxis().GetXmax() + unity = ROOT.TLine(xmin, 1.0, xmax, 1.0) + unity.SetLineColor(ROOT.kBlack) + unity.SetLineWidth(1) + unity.Draw() + + bot_legend = ROOT.TLegend(0.58, 0.78, 0.88, 0.95) + bot_legend.SetBorderSize(0) + bot_legend.SetTextSize(0.09) + bot_legend.AddEntry(ratio_el, "EventLoop / Coffea", "l") + bot_legend.AddEntry(ratio_ff, "FastFrames / Coffea", "l") + bot_legend.Draw() else: - ax_bot.text( - 0.5, 0.5, "Binning mismatch — ratio unavailable", - ha="center", va="center", transform=ax_bot.transAxes, - ) + msg = ROOT.TLatex() + msg.SetNDC() + msg.SetTextSize(0.08) + msg.DrawLatex(0.15, 0.5, "Binning mismatch #font[52]{ratio unavailable}") - ax_bot.set_xlabel(r"Photon $p_\mathrm{T}$ [GeV]") - fig.tight_layout() - fig.savefig(args.plot, dpi=150) + canvas.SaveAs(args.plot) print(f"\nPlot saved to: {args.plot}") From 382850894e8d52392694ee2606bfaa5fc22d8a80 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Sun, 17 May 2026 16:25:10 -0700 Subject: [PATCH 10/18] feat: add setup and queue timing to all benchmark scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - benchmark_utils.sh: reorder append_benchmark params so mode is last (new signature: log_file start end [setup_start] [setup_end] [mode]) and read SUBMIT_TIME from env for queue-time tracking via HTCondor - base_parser.py: parse submit_time_utc (→ submitTime ms, queueTime s) and setup_start/end_time_utc (→ setupTime s) using arrow - payload.schema.json: add optional setupTime field - All 21 run scripts (EVNT, TRUTH3, NTuple_Hist, Rucio): - Container scripts: capture setup_start before export ATLAS_LOCAL_ROOT_BASE, write SETUP_COMPLETE marker inside -r string after asetup/lsetup, grep it after container exits for setup_end - Native scripts: capture setup_start/setup_end around atlasLocalSetup + asetup/lsetup before payload command - Rucio: records setupTime=0 (start_time used for both bounds) Co-Authored-By: Claude Sonnet 4.6 --- EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh | 5 +- EVNT/BNL/EL9/run_evnt_el9_batch.sh | 5 +- EVNT/BNL/Native/run_evnt_native_batch.sh | 6 ++- EVNT/UC/CentOS7/run_evnt_centos7_batch.sh | 5 +- EVNT/UC/EL9/run_evnt_el9_batch.sh | 5 +- EVNT/UC/Native/run_evnt_native_batch.sh | 6 ++- NTuple_Hist/coffea/BNL/run_example.sh | 5 +- NTuple_Hist/coffea/UC/run_example.sh | 5 +- .../BNL/columnar/run_eventloop_arrays.sh | 7 ++- .../BNL/standard/run_eventloop_noarrays.sh | 7 ++- .../UC/columnar/run_eventloop_arrays.sh | 6 ++- .../UC/standard/run_eventloop_noarrays.sh | 6 ++- NTuple_Hist/fastframes/BNL/run_fastframes.sh | 5 +- NTuple_Hist/fastframes/UC/run_fastframes.sh | 6 ++- Rucio/rucio_script.sh | 4 +- .../BNL/CentOS7/run_truth3_centos7_batch.sh | 9 ++-- TRUTH3/BNL/EL9/run_truth3_el9_batch.sh | 9 ++-- TRUTH3/BNL/Native/run_truth3_native_batch.sh | 6 ++- TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh | 5 +- TRUTH3/UC/EL9/run_truth3_el9_batch.sh | 5 +- TRUTH3/UC/Native/run_truth3_native_batch.sh | 6 ++- parsing/base_parser.py | 24 ++++++--- parsing/schema/payload.schema.json | 5 ++ parsing/tests/test_parsers.py | 52 +++++++++++++++++++ parsing/utils/benchmark_utils.sh | 24 +++++---- 25 files changed, 187 insertions(+), 41 deletions(-) diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index b7341d98..d85b9a71 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -9,14 +9,17 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c centos7 -m /atlasgpfs01 -r "asetup AthGeneration,23.6.31,here && export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Output directory @@ -28,7 +31,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index a3ed04b2..f56be707 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -12,14 +12,17 @@ OScontainer="el9" ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c ${OScontainer} -m /atlasgpfs01 -r "asetup AthGeneration,23.6.34,here &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Output directory @@ -31,7 +34,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index a8d185be..d15ddb0b 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -8,6 +8,8 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # The seed used in the job seed=1001 +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our working environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -16,6 +18,8 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh # Sets up the Ath* version asetup AthGeneration,23.6.34,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Appends time before Gen_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -36,7 +40,7 @@ mkdir -p "${output_dir}" hostname >> split.log du EVNT.root >> split.log -append_benchmark log.generate "${start_time}" "${end_time}" "time_v" +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves the log file to the output directory mv log.generate "${output_dir}" diff --git a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh index e69628ab..1041d1e3 100755 --- a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh @@ -15,6 +15,7 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" # Creates the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Gen_tf.py to log file @@ -29,9 +30,11 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here &&\ export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.generate" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Appends time after Gen_tf.py to a log file @@ -41,4 +44,4 @@ end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") du EVNT.root } >> split.log -append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/EVNT/UC/EL9/run_evnt_el9_batch.sh b/EVNT/UC/EL9/run_evnt_el9_batch.sh index 2067cf95..2cdfb184 100755 --- a/EVNT/UC/EL9/run_evnt_el9_batch.sh +++ b/EVNT/UC/EL9/run_evnt_el9_batch.sh @@ -14,6 +14,7 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" # Setting up the working environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Gen_tf.py to log file @@ -25,9 +26,11 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c "${OS_container}" -r "asetup AthGeneration,23.6.34,here && \ +echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log && \ cat pipe_file.log >> log.generate" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Appends time after Gen_tf.py to a log file @@ -37,4 +40,4 @@ end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") du EVNT.root } >> split.log -append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/EVNT/UC/Native/run_evnt_native_batch.sh b/EVNT/UC/Native/run_evnt_native_batch.sh index 8355aae3..2a38bc77 100755 --- a/EVNT/UC/Native/run_evnt_native_batch.sh +++ b/EVNT/UC/Native/run_evnt_native_batch.sh @@ -12,6 +12,8 @@ config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" max_events=1000 +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our working environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -25,6 +27,8 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Sets up the Ath* version asetup AthGeneration,23.6.34,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::EVNT Generation" /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig="${config_dir}" --outputEVNTFile=EVNT.root --maxEvents="${max_events}" --randomSeed="${seed}" 2>&1 | tee pipe_file.log cat pipe_file.log >> log.generate @@ -41,4 +45,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "log.generate" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index c8230897..6416652d 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -18,6 +18,7 @@ fi cp ~/AF-Benchmarking/NTuple_Hist/coffea/BNL/example.py . +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -25,8 +26,10 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ /usr/bin/time -v ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") { @@ -39,7 +42,7 @@ output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${start_time}" mkdir -p "${output_dir}" -append_benchmark coffea_hist.log "${start_time}" "${end_time}" "time_v" +append_benchmark coffea_hist.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" mv coffea_hist.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/coffea/UC/run_example.sh b/NTuple_Hist/coffea/UC/run_example.sh index 64f52f81..74720f3b 100755 --- a/NTuple_Hist/coffea/UC/run_example.sh +++ b/NTuple_Hist/coffea/UC/run_example.sh @@ -9,6 +9,7 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") #cp ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py . # Setting up environment and container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -16,8 +17,10 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /data -r "ls python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ /usr/bin/time -v ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") echo "::group::Collect Metrics" @@ -27,4 +30,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "coffea_hist.log" "${start_time}" "${end_time}" "time_v" +append_benchmark "coffea_hist.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index 9f5cc11b..bfab29ce 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -5,11 +5,16 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 + +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log /usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.py 2>&1 | tee eventloop_arrays.log @@ -30,7 +35,7 @@ echo "End Time: ${end_time}" # Verify the log exists before appending if [ -f eventloop_arrays.log ]; then - append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" "time_v" + append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" else echo "ERROR: eventloop_arrays.log not found in $(pwd)" fi diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 97484b13..7da5e798 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -5,12 +5,17 @@ source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 + +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ /usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.py 2>&1 | tee eventloop_noarrays.log @@ -26,7 +31,7 @@ output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${start_time} mkdir -p "${output_dir}" -append_benchmark eventloop_noarrays.log "${start_time}" "${end_time}" "time_v" +append_benchmark eventloop_noarrays.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" mv eventloop_noarrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh index 6c04e145..b0fb3f44 100755 --- a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh @@ -4,6 +4,8 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -11,6 +13,8 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh echo "::endgroup::" lsetup "views LCG_107a_ATLAS_2 x86_64-el9-gcc13-opt" +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Getting start date date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -27,4 +31,4 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" -append_benchmark "eventloop_arrays.log" "${start_time}" "${end_time}" "time_v" +append_benchmark "eventloop_arrays.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh index 18696655..a491cb7c 100755 --- a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh @@ -4,6 +4,8 @@ source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -11,6 +13,8 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh echo "::endgroup::" lsetup "views LCG_107a_ATLAS_2 x86_64-el9-gcc13-opt" +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Getting start date date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -27,4 +31,4 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" -append_benchmark "eventloop_noarrays.log" "${start_time}" "${end_time}" "time_v" +append_benchmark "eventloop_noarrays.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 3b9f06b2..7599f1c9 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -10,6 +10,7 @@ cd /atlasgpfs01/usatlas/data/qlei/ || exit # Sets up ATLAS environment echo "::group::setupATLAS" +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -20,6 +21,7 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -m /atlasgpfs01/usatlas/data/ -c el9 -r "asetup StatAnalysis,0.6.2 &&\ source /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/TutorialClass/build/setup.sh &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ /usr/bin/time -v python3 /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/FastFrames/python/FastFrames.py -c /atlasgpfs01/usatlas/data/qlei/input/mc20e_example_config.yml 2>&1 | tee fastframes.log" # Getting the date and time after running script @@ -30,6 +32,7 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # output directory @@ -38,7 +41,7 @@ output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${start_time}" # Creates output dir mkdir -p "${output_dir}" -append_benchmark fastframes.log "${start_time}" "${end_time}" "time_v" +append_benchmark fastframes.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves log to outputdir mv fastframes.log "${output_dir}" diff --git a/NTuple_Hist/fastframes/UC/run_fastframes.sh b/NTuple_Hist/fastframes/UC/run_fastframes.sh index 9934d74d..80afa566 100755 --- a/NTuple_Hist/fastframes/UC/run_fastframes.sh +++ b/NTuple_Hist/fastframes/UC/run_fastframes.sh @@ -6,6 +6,8 @@ yml_dir="${GITHUB_WORKSPACE}/NTuple_Hist/fastframes/UC/" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our working environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -20,6 +22,8 @@ printf "%s" "${VOMS_PASSWORD}" | voms-proxy-init -voms atlas # shellcheck disable=SC1091 source /data/selbor/FastFramesTutorial/TutorialClass/build/setup.sh +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log echo "::group::FastFrames" @@ -44,4 +48,4 @@ if [[ -d "${cleanup_dir}" && "${cleanup_dir}" == "/home/selbor/ntuple/fastframes rm -rf "${cleanup_dir:?}/"* fi -append_benchmark "fastframes.log" "${start_time}" "${end_time}" "time_v" +append_benchmark "fastframes.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/Rucio/rucio_script.sh b/Rucio/rucio_script.sh index 4c82e4d4..c931ff24 100755 --- a/Rucio/rucio_script.sh +++ b/Rucio/rucio_script.sh @@ -27,7 +27,7 @@ container_el9 (){ du \"${4#*:}\"/ >> rucio.log &&\ mv rucio.log \"${3}\"" end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") - append_benchmark "${3}/rucio.log" "${start_time}" "${end_time}" "rucio" + append_benchmark "${3}/rucio.log" "${start_time}" "${end_time}" "${start_time}" "${start_time}" "rucio" } native_el9 () { @@ -57,7 +57,7 @@ native_el9 () { hostname >> rucio.log du "${3#*:}" >> rucio.log echo "::endgroup::" - append_benchmark "rucio.log" "${start_time}" "${end_time}" "rucio" + append_benchmark "rucio.log" "${start_time}" "${end_time}" "${start_time}" "${start_time}" "rucio" mv rucio.log "${1}" } diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index 33aa2a59..4149d50b 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -10,6 +10,7 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . # Sets up the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Sets up the container: @@ -18,11 +19,13 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log &&\ /usr/bin/time -v Reco_tf.py --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.EVNTtoDAOD &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${start_time}" @@ -32,7 +35,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.EVNTtoDAOD "${start_time}" "${end_time}" "time_v" +append_benchmark log.EVNTtoDAOD "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves the log file to the output directory mv log.EVNTtoDAOD "${output_dir}" diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index d3394a65..d88daaab 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -9,6 +9,7 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . # Sets up the environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Sets up the container: @@ -17,11 +18,13 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log &&\ /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log &&\ cat pipe_file.log >> log.Derivation &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Defines the output directory @@ -34,7 +37,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.Derivation "${start_time}" "${end_time}" "time_v" +append_benchmark log.Derivation "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" # Moves the log file to the output directory mv log.Derivation "${output_dir}" diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index 0292451b..899cf519 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -8,6 +8,8 @@ start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -16,6 +18,8 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh # Sets the Athena version we want asetup Athena,24.0.53,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Appends time before Derivation_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -38,7 +42,7 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log -append_benchmark log.Derivation "${start_time}" "${end_time}" "truth_v" +append_benchmark log.Derivation "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" "truth_v" # Moves the log file to the output directory mv log.Derivation "${output_dir}" diff --git a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh index 073757b4..ae21abd3 100755 --- a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh @@ -9,6 +9,7 @@ config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Sets up the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Reco_tf.py to log file @@ -20,9 +21,11 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ /usr/bin/time -v Reco_tf.py --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log && \ cat pipe_file.log >> log.EVNTtoDAOD" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Obtains and appends the host machine and payload size to the log file @@ -33,4 +36,4 @@ end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") du DAOD_TRUTH3.TRUTH3.root } >> split.log -append_benchmark "log.EVNTtoDAOD" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.EVNTtoDAOD" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh index 80b1a856..45a9fb6f 100755 --- a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh @@ -9,6 +9,7 @@ config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Sets up the environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Derivation_tf.py to log file @@ -20,9 +21,11 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here && \ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log && \ cat pipe_file.log >> log.Derivation" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Obtains and appends the host machine and payload size to the log file @@ -33,4 +36,4 @@ end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") du DAOD_TRUTH3.TRUTH3.root } >> split.log -append_benchmark "log.Derivation" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/TRUTH3/UC/Native/run_truth3_native_batch.sh b/TRUTH3/UC/Native/run_truth3_native_batch.sh index 6510392c..d906dac4 100755 --- a/TRUTH3/UC/Native/run_truth3_native_batch.sh +++ b/TRUTH3/UC/Native/run_truth3_native_batch.sh @@ -8,6 +8,8 @@ config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -21,6 +23,8 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Sets the Athena version we want asetup Athena,24.0.53,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::TRUTH3 Derivation" /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile "${config_dir}" --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log cat pipe_file.log >> log.Derivation @@ -41,4 +45,4 @@ echo "::group::Collect Metrics" } >> split.log echo "::endgroup::" -append_benchmark "log.Derivation" "${start_time}" "${end_time}" "time_v" +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/parsing/base_parser.py b/parsing/base_parser.py index adc15a12..de9e8934 100644 --- a/parsing/base_parser.py +++ b/parsing/base_parser.py @@ -64,13 +64,25 @@ def parse_atlas_log(path, log_name="ATLAS"): start_dt = arrow.get(benchmark["start_time_utc"]) end_dt = arrow.get(benchmark["end_time_utc"]) - queue_time = 0 - run_time = int((end_dt - start_dt).total_seconds()) - return { - "submitTime": start_dt.int_timestamp * 1000, # milliseconds + if "submit_time_utc" in benchmark: + submit_dt = arrow.get(benchmark["submit_time_utc"]) + submit_time_ms = submit_dt.int_timestamp * 1000 + queue_time = int((start_dt - submit_dt).total_seconds()) + else: + submit_time_ms = start_dt.int_timestamp * 1000 + queue_time = 0 + + result = { + "submitTime": submit_time_ms, "queueTime": queue_time, - "runTime": run_time, + "runTime": int((end_dt - start_dt).total_seconds()), "status": int(benchmark.get("exit_status", 0)), - # "benchmark": benchmark, # full block — subparsers can pull extra fields from here } + + if "setup_start_time_utc" in benchmark and "setup_end_time_utc" in benchmark: + setup_start_dt = arrow.get(benchmark["setup_start_time_utc"]) + setup_end_dt = arrow.get(benchmark["setup_end_time_utc"]) + result["setupTime"] = int((setup_end_dt - setup_start_dt).total_seconds()) + + return result diff --git a/parsing/schema/payload.schema.json b/parsing/schema/payload.schema.json index 00544257..d1a743b7 100644 --- a/parsing/schema/payload.schema.json +++ b/parsing/schema/payload.schema.json @@ -55,6 +55,11 @@ "description": "Frequency or event processing rate (kHz)", "minimum": 0 }, + "setupTime": { + "type": "integer", + "description": "Environment setup time in seconds (setupATLAS + asetup/lsetup)", + "minimum": 0 + }, "status": { "type": "integer", "description": "Job exit status (0 = success, non-zero = failure)", diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index 90b4b288..28e3b0d3 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -122,3 +122,55 @@ def test_uses_last_benchmark_block(self, tmp_path): assert result["submitTime"] == 1765216860000 assert result["runTime"] == 45 assert result["status"] == 0 + + def test_submit_time_utc_sets_submit_time_and_queue_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "submit_time_utc=2025-12-08T18:00:00Z\n" + "start_time_utc=2025-12-08T18:05:00Z\n" + "end_time_utc=2025-12-08T18:45:00Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216800000 # submit_time_utc in ms + assert result["queueTime"] == 300 # 5 minutes in queue + assert result["runTime"] == 2400 # 40 minutes running + + def test_missing_submit_time_uses_start_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216800000 + assert result["queueTime"] == 0 + + def test_setup_times_produce_setup_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:30:00Z\n" + "setup_start_time_utc=2025-12-08T18:00:00Z\n" + "setup_end_time_utc=2025-12-08T18:02:30Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["setupTime"] == 150 # 2.5 minutes in seconds + + def test_missing_setup_times_omits_setup_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert "setupTime" not in result diff --git a/parsing/utils/benchmark_utils.sh b/parsing/utils/benchmark_utils.sh index eb827cb4..be151ac9 100644 --- a/parsing/utils/benchmark_utils.sh +++ b/parsing/utils/benchmark_utils.sh @@ -58,12 +58,15 @@ extract_rucio_metrics() { } # Append standardized benchmark block to a log file -# Usage: append_benchmark [mode] +# Usage: append_benchmark [setup_start] [setup_end] [mode] +# Reads SUBMIT_TIME from environment if set (injected by cron/submit scripts via HTCondor). append_benchmark() { local log_file=$1 local start_time=$2 local end_time=$3 - local mode=${4:-time_v} + local setup_start=${4:-} + local setup_end=${5:-} + local mode=${6:-time_v} local extra_metrics="" case "${mode}" in @@ -72,11 +75,14 @@ append_benchmark() { none) ;; esac - cat >> "${log_file}" <> "${log_file}" } From 5ff45394004865cd24b1f4910e8ac0e1b8a1c513 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 23:26:33 +0000 Subject: [PATCH 11/18] style: pre-commit fixes --- NTuple_Hist/compare_outputs.py | 50 ++++++++++++++++++++++++---------- parsing/tests/test_parsers.py | 4 +-- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/NTuple_Hist/compare_outputs.py b/NTuple_Hist/compare_outputs.py index 0696ee3f..54e91dc4 100644 --- a/NTuple_Hist/compare_outputs.py +++ b/NTuple_Hist/compare_outputs.py @@ -49,23 +49,25 @@ def check_binning(hists): ref_label, ref_h = hists[0] ok = True for label, h in hists[1:]: - if h.GetNbinsX() != ref_h.GetNbinsX() or \ - h.GetXaxis().GetXmin() != ref_h.GetXaxis().GetXmin() or \ - h.GetXaxis().GetXmax() != ref_h.GetXaxis().GetXmax(): + if ( + h.GetNbinsX() != ref_h.GetNbinsX() + or h.GetXaxis().GetXmin() != ref_h.GetXaxis().GetXmin() + or h.GetXaxis().GetXmax() != ref_h.GetXaxis().GetXmax() + ): print(f"WARNING: binning of {label!r} differs from {ref_label!r}") ok = False return ok def print_summary(hists): - print(f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}") + print( + f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}" + ) print("-" * 58) for label, h in hists: integral = h.Integral() peak = h.GetMaximum() - nonzero = sum( - 1 for i in range(1, h.GetNbinsX() + 1) if h.GetBinContent(i) > 0 - ) + nonzero = sum(1 for i in range(1, h.GetNbinsX() + 1) if h.GetBinContent(i) > 0) print(f"{label:<12} {integral:>14.4f} {peak:>12.4f} {nonzero:>16d}") @@ -84,21 +86,31 @@ def main(): ) parser.add_argument("--coffea", required=True, help="coffea output ROOT file") parser.add_argument( - "--coffea-hist", default="all", metavar="NAME", + "--coffea-hist", + default="all", + metavar="NAME", help="histogram name in coffea ROOT file (default: all)", ) parser.add_argument("--eventloop", required=True, help="eventloop output ROOT file") parser.add_argument( - "--eventloop-hist", default="baseline_pt_total", metavar="NAME", + "--eventloop-hist", + default="baseline_pt_total", + metavar="NAME", help="histogram name in eventloop ROOT file (default: baseline_pt_total)", ) - parser.add_argument("--fastframes", required=True, help="fastframes output ROOT file") parser.add_argument( - "--fastframes-hist", default="example_FS_Muon_ph_pt_NOSYS", metavar="NAME", + "--fastframes", required=True, help="fastframes output ROOT file" + ) + parser.add_argument( + "--fastframes-hist", + default="example_FS_Muon_ph_pt_NOSYS", + metavar="NAME", help="histogram name in fastframes ROOT file (default: example_FS_Muon_ph_pt_NOSYS)", ) parser.add_argument( - "--plot", default="comparison.pdf", metavar="PATH", + "--plot", + default="comparison.pdf", + metavar="PATH", help="output plot file (default: comparison.pdf; pass empty string to skip)", ) args = parser.parse_args() @@ -122,7 +134,11 @@ def main(): if coffea_h.GetBinContent(i) > 0 ) / max( 1, - sum(1 for i in range(1, coffea_h.GetNbinsX() + 1) if coffea_h.GetBinContent(i) > 0), + sum( + 1 + for i in range(1, coffea_h.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ), ) mean_ff = sum( ratio_ff.GetBinContent(i) @@ -130,9 +146,13 @@ def main(): if coffea_h.GetBinContent(i) > 0 ) / max( 1, - sum(1 for i in range(1, coffea_h.GetNbinsX() + 1) if coffea_h.GetBinContent(i) > 0), + sum( + 1 + for i in range(1, coffea_h.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ), ) - print(f"\nMean bin ratio (where coffea > 0):") + print("\nMean bin ratio (where coffea > 0):") print(f" EventLoop / Coffea : {mean_el:.4f}") print(f" FastFrames / Coffea : {mean_ff:.4f}") diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index 28e3b0d3..d7c348a0 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -135,8 +135,8 @@ def test_submit_time_utc_sets_submit_time_and_queue_time(self, tmp_path): ) result = parse_atlas_log(log_file) assert result["submitTime"] == 1765216800000 # submit_time_utc in ms - assert result["queueTime"] == 300 # 5 minutes in queue - assert result["runTime"] == 2400 # 40 minutes running + assert result["queueTime"] == 300 # 5 minutes in queue + assert result["runTime"] == 2400 # 40 minutes running def test_missing_submit_time_uses_start_time(self, tmp_path): log_file = tmp_path / "log.generate" From 2227e2c73ee8d7229b7e2172d6d26146023d5ca5 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Wed, 20 May 2026 16:47:40 -0700 Subject: [PATCH 12/18] fix: clamp queueTime and setupTime to zero on clock skew Slight clock differences between submit host and worker node can produce negative values from timestamp arithmetic, violating the schema's minimum: 0 constraint. Added max(0, ...) guards for both. Co-Authored-By: Claude Sonnet 4.6 --- parsing/base_parser.py | 4 ++-- parsing/tests/test_parsers.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/parsing/base_parser.py b/parsing/base_parser.py index de9e8934..cb1f642b 100644 --- a/parsing/base_parser.py +++ b/parsing/base_parser.py @@ -68,7 +68,7 @@ def parse_atlas_log(path, log_name="ATLAS"): if "submit_time_utc" in benchmark: submit_dt = arrow.get(benchmark["submit_time_utc"]) submit_time_ms = submit_dt.int_timestamp * 1000 - queue_time = int((start_dt - submit_dt).total_seconds()) + queue_time = max(0, int((start_dt - submit_dt).total_seconds())) else: submit_time_ms = start_dt.int_timestamp * 1000 queue_time = 0 @@ -83,6 +83,6 @@ def parse_atlas_log(path, log_name="ATLAS"): if "setup_start_time_utc" in benchmark and "setup_end_time_utc" in benchmark: setup_start_dt = arrow.get(benchmark["setup_start_time_utc"]) setup_end_dt = arrow.get(benchmark["setup_end_time_utc"]) - result["setupTime"] = int((setup_end_dt - setup_start_dt).total_seconds()) + result["setupTime"] = max(0, int((setup_end_dt - setup_start_dt).total_seconds())) return result diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index d7c348a0..22bd29bc 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -174,3 +174,28 @@ def test_missing_setup_times_omits_setup_time(self, tmp_path): ) result = parse_atlas_log(log_file) assert "setupTime" not in result + + def test_clock_skew_on_queue_time_clamps_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "submit_time_utc=2025-12-08T18:00:02Z\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["queueTime"] == 0 + + def test_clock_skew_on_setup_time_clamps_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:30:00Z\n" + "setup_start_time_utc=2025-12-08T18:00:01Z\n" + "setup_end_time_utc=2025-12-08T18:00:00Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["setupTime"] == 0 From 7321344f802bd87ede778b59e2e6dd2dadaac4ab Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Wed, 20 May 2026 16:49:27 -0700 Subject: [PATCH 13/18] fix: update BNL benchmark path to AF-Benchmarking Co-Authored-By: Claude Sonnet 4.6 --- EVNT/BNL/CentOS7/centos_cron.sh | 2 +- EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh | 2 +- EVNT/BNL/EL9/el_cron.sh | 2 +- EVNT/BNL/EL9/run_evnt_el9_batch.sh | 2 +- EVNT/BNL/Native/native_cron.sh | 2 +- EVNT/BNL/Native/run_evnt_native_batch.sh | 2 +- NTuple_Hist/coffea/BNL/cron_example.sh | 2 +- NTuple_Hist/coffea/BNL/run_example.sh | 2 +- NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh | 2 +- NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh | 2 +- NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh | 2 +- NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh | 2 +- NTuple_Hist/fastframes/BNL/crontab_fastframes.sh | 2 +- NTuple_Hist/fastframes/BNL/run_fastframes.sh | 2 +- Rucio/cron_rucio_bnl.sh | 2 +- Rucio/rucio_script.sh | 2 +- TRUTH3/BNL/CentOS7/cron_centos_batch.sh | 2 +- TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh | 2 +- TRUTH3/BNL/EL9/cron_el_batch.sh | 2 +- TRUTH3/BNL/EL9/run_truth3_el9_batch.sh | 2 +- TRUTH3/BNL/Native/cron_native_batch.sh | 2 +- TRUTH3/BNL/Native/run_truth3_native_batch.sh | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/EVNT/BNL/CentOS7/centos_cron.sh b/EVNT/BNL/CentOS7/centos_cron.sh index 96f48726..f22bbbc5 100644 --- a/EVNT/BNL/CentOS7/centos_cron.sh +++ b/EVNT/BNL/CentOS7/centos_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/centos" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/CentOS7/evnt_centos.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index d85b9a71..01b43d6a 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # Current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/EVNT/BNL/EL9/el_cron.sh b/EVNT/BNL/EL9/el_cron.sh index a8856a55..e372afb5 100644 --- a/EVNT/BNL/EL9/el_cron.sh +++ b/EVNT/BNL/EL9/el_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/el" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/EL9/evnt_el.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index f56be707..94ce1f57 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/EVNT/BNL/Native/native_cron.sh b/EVNT/BNL/Native/native_cron.sh index 62116147..49931c4f 100644 --- a/EVNT/BNL/Native/native_cron.sh +++ b/EVNT/BNL/Native/native_cron.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch" readonly log_output="log.generate" readonly job_dir="/usatlas/u/qlei/test/EVNT/native" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/EVNT/BNL/Native/evnt_native.sub" readonly pixi_job="evnt" diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index d15ddb0b..6e860bf7 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/NTuple_Hist/coffea/BNL/cron_example.sh b/NTuple_Hist/coffea/BNL/cron_example.sh index 2807237d..8670c581 100644 --- a/NTuple_Hist/coffea/BNL/cron_example.sh +++ b/NTuple_Hist/coffea/BNL/cron_example.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist" readonly log_output="coffea_hist.log" readonly job_dir="/usatlas/u/qlei/test/coffea" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/coffea/BNL/coffea_el9.sub" readonly pixi_job="coffea" diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index 6416652d..5997f67c 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh index 9e5fa2a3..d5e29cd0 100644 --- a/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/cron_eventloop_arrays.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays" readonly log_output="eventloop_arrays.log" readonly job_dir="/usatlas/u/qlei/test/eventloop_arrays" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub" readonly pixi_job="eventloop-columnar" diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index bfab29ce..07269751 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh index ef47065c..47caccc2 100644 --- a/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/cron_eventloop_noarrays.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays" readonly log_output="eventloop_noarrays.log" readonly job_dir="/usatlas/u/qlei/test/eventloop_noarrays" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub" readonly pixi_job="eventloop-standard" diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 7da5e798..00a454ce 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh b/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh index b6599a7b..a8ca022f 100644 --- a/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/crontab_fastframes.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple" readonly log_output="fastframes.log" readonly job_dir="/usatlas/u/qlei/test/fastframes" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/NTuple_Hist/fastframes/BNL/fastframes_el9.sub" readonly pixi_job="fastframes" diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 7599f1c9..22f7d075 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/Rucio/cron_rucio_bnl.sh b/Rucio/cron_rucio_bnl.sh index 447b67a0..53140a6d 100644 --- a/Rucio/cron_rucio_bnl.sh +++ b/Rucio/cron_rucio_bnl.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/Rucio" readonly log_output="rucio.log" readonly job_dir="/usatlas/u/qlei/test/Rucio" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/Rucio/rucio_el.sub" readonly pixi_job="rucio" diff --git a/Rucio/rucio_script.sh b/Rucio/rucio_script.sh index c931ff24..f7bb31e2 100755 --- a/Rucio/rucio_script.sh +++ b/Rucio/rucio_script.sh @@ -88,7 +88,7 @@ case "$site" in job_dir="/usatlas/u/qlei/test/Rucio/" dir_mount="/atlasgpfs01/usatlas/data/" output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Rucio/${curr_time}/" - AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" + AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" source ${AF_BENCH_DIR}/parsing/utils/benchmark_utils.sh container_el9 "$job_dir" "$dir_mount" "$output_dir" "$download_ID" ;; diff --git a/TRUTH3/BNL/CentOS7/cron_centos_batch.sh b/TRUTH3/BNL/CentOS7/cron_centos_batch.sh index f0bc2281..ff3d4377 100644 --- a/TRUTH3/BNL/CentOS7/cron_centos_batch.sh +++ b/TRUTH3/BNL/CentOS7/cron_centos_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch" readonly log_output="log.EVNTtoDAOD" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/centos" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/CentOS7/truth3_centos.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index 4149d50b..349950d7 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/TRUTH3/BNL/EL9/cron_el_batch.sh b/TRUTH3/BNL/EL9/cron_el_batch.sh index 489379ab..071b8ea4 100644 --- a/TRUTH3/BNL/EL9/cron_el_batch.sh +++ b/TRUTH3/BNL/EL9/cron_el_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch" readonly log_output="log.Derivation" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/el" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/EL9/truth3_el.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index d88daaab..af993980 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # Current time used for file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") diff --git a/TRUTH3/BNL/Native/cron_native_batch.sh b/TRUTH3/BNL/Native/cron_native_batch.sh index 55183ba5..b6476f8a 100644 --- a/TRUTH3/BNL/Native/cron_native_batch.sh +++ b/TRUTH3/BNL/Native/cron_native_batch.sh @@ -4,7 +4,7 @@ readonly log_base="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch" readonly log_output="log.Derivation" readonly job_dir="/usatlas/u/qlei/test/TRUTH3/native" -readonly AF_BENCH_DIR="/usatlas/u/qlei/dev/af-benchmarking" +readonly AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" readonly sub_file="${AF_BENCH_DIR}/TRUTH3/BNL/Native/truth3_native.sub" readonly pixi_job="truth3" diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index 899cf519..7c38d64e 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /usatlas/u/qlei/dev/af-benchmarking/parsing/utils/benchmark_utils.sh +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") From bf36ca931bb0c78701036df580e0354d68890e8e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 May 2026 00:12:33 +0000 Subject: [PATCH 14/18] style: pre-commit fixes --- parsing/base_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parsing/base_parser.py b/parsing/base_parser.py index cb1f642b..6597e2f6 100644 --- a/parsing/base_parser.py +++ b/parsing/base_parser.py @@ -83,6 +83,8 @@ def parse_atlas_log(path, log_name="ATLAS"): if "setup_start_time_utc" in benchmark and "setup_end_time_utc" in benchmark: setup_start_dt = arrow.get(benchmark["setup_start_time_utc"]) setup_end_dt = arrow.get(benchmark["setup_end_time_utc"]) - result["setupTime"] = max(0, int((setup_end_dt - setup_start_dt).total_seconds())) + result["setupTime"] = max( + 0, int((setup_end_dt - setup_start_dt).total_seconds()) + ) return result From 242285b7c47ad85b790d305a85e1cc9716754543 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Wed, 20 May 2026 18:20:38 -0700 Subject: [PATCH 15/18] fix: suppress SC1091 shellcheck warnings in BNL batch scripts The sourced benchmark_utils.sh uses an absolute cluster path that doesn't exist locally, causing false SC1091 failures in shellcheck. Co-Authored-By: Claude Sonnet 4.6 --- EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh | 1 + EVNT/BNL/EL9/run_evnt_el9_batch.sh | 1 + EVNT/BNL/Native/run_evnt_native_batch.sh | 1 + NTuple_Hist/coffea/BNL/run_example.sh | 1 + NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh | 1 + NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh | 1 + NTuple_Hist/fastframes/BNL/run_fastframes.sh | 1 + TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh | 1 + TRUTH3/BNL/EL9/run_truth3_el9_batch.sh | 1 + TRUTH3/BNL/Native/run_truth3_native_batch.sh | 1 + 10 files changed, 10 insertions(+) diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index 01b43d6a..7f38aaee 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # Current time used for log file storage diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index 94ce1f57..d099dc65 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index 6e860bf7..68d5d1d8 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index 5997f67c..66e7cdeb 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index 07269751..7cb964bc 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 00a454ce..8ff6f74e 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 22f7d075..69a6be30 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index 349950d7..cb8308e3 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index af993980..ed3b9585 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # Current time used for file storage diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index 7c38d64e..97936eb5 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck disable=SC1091 source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh # current time used for log file storage From 27090cd5e8888ef17d1ad41e4a80781edb11fbdb Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Thu, 21 May 2026 15:28:59 -0700 Subject: [PATCH 16/18] fix: align BNL .sub Executable paths with AF_BENCH_DIR install location The 11 HTCondor .sub files pointed Executable at /usatlas/u/qlei/dev/af-benchmarking/... while every BNL shell script sourced and referenced /usatlas/u/qlei/AF-Benchmarking/. This mismatch meant HTCondor launched executables from a different checkout than the one the scripts internally relied on. Co-Authored-By: Claude Opus 4.7 --- EVNT/BNL/CentOS7/evnt_centos.sub | 2 +- EVNT/BNL/EL9/evnt_el.sub | 2 +- EVNT/BNL/Native/evnt_native.sub | 2 +- NTuple_Hist/coffea/BNL/coffea_el9.sub | 2 +- NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub | 2 +- NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub | 2 +- NTuple_Hist/fastframes/BNL/fastframes_el9.sub | 2 +- Rucio/rucio_el.sub | 2 +- TRUTH3/BNL/CentOS7/truth3_centos.sub | 2 +- TRUTH3/BNL/EL9/truth3_el.sub | 2 +- TRUTH3/BNL/Native/truth3_native.sub | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/EVNT/BNL/CentOS7/evnt_centos.sub b/EVNT/BNL/CentOS7/evnt_centos.sub index c38e866c..a8537d15 100644 --- a/EVNT/BNL/CentOS7/evnt_centos.sub +++ b/EVNT/BNL/CentOS7/evnt_centos.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Proce Error = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/EVNT/BNL/EL9/evnt_el.sub b/EVNT/BNL/EL9/evnt_el.sub index 5615da6d..580360b2 100644 --- a/EVNT/BNL/EL9/evnt_el.sub +++ b/EVNT/BNL/EL9/evnt_el.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process). Error = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/EL9/run_evnt_el9_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/EL9/run_evnt_el9_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/EVNT/BNL/Native/evnt_native.sub b/EVNT/BNL/Native/evnt_native.sub index 10084a33..63de0439 100644 --- a/EVNT/BNL/Native/evnt_native.sub +++ b/EVNT/BNL/Native/evnt_native.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Proce Error = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/EVNT/BNL/Native/run_evnt_native_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/EVNT/BNL/Native/run_evnt_native_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/coffea/BNL/coffea_el9.sub b/NTuple_Hist/coffea/BNL/coffea_el9.sub index 7ee91ec5..d030860d 100755 --- a/NTuple_Hist/coffea/BNL/coffea_el9.sub +++ b/NTuple_Hist/coffea/BNL/coffea_el9.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).o Error = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/coffea/BNL/run_example.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/coffea/BNL/run_example.sh request_memory = 16GB request_cpus = 4 diff --git a/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub b/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub index 8c85da41..868050d8 100644 --- a/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub +++ b/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$( Error = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/eventloop_arrays/eventloop_arrays.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub b/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub index 4554be82..83cf600f 100644 --- a/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub +++ b/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process Error = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/eventloop/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh request_memory = 3GB request_cpus = 1 diff --git a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub index 1545050f..8fadba3a 100644 --- a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub +++ b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub @@ -5,7 +5,7 @@ Output = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Proces Error = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/NTuple_Hist/fastframes/BNL/run_fastframes.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/NTuple_Hist/fastframes/BNL/run_fastframes.sh request_memory = 3GB request_cpus = 1 diff --git a/Rucio/rucio_el.sub b/Rucio/rucio_el.sub index 7fa6ad79..ef1a98c6 100644 --- a/Rucio/rucio_el.sub +++ b/Rucio/rucio_el.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).ou Error = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/rucio/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/Rucio/rucio_script.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/Rucio/rucio_script.sh Arguments = bnl request_memory = 16GB diff --git a/TRUTH3/BNL/CentOS7/truth3_centos.sub b/TRUTH3/BNL/CentOS7/truth3_centos.sub index 622c065e..ef8a4632 100644 --- a/TRUTH3/BNL/CentOS7/truth3_centos.sub +++ b/TRUTH3/BNL/CentOS7/truth3_centos.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Pro Error = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/centos/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh request_memory = 3G diff --git a/TRUTH3/BNL/EL9/truth3_el.sub b/TRUTH3/BNL/EL9/truth3_el.sub index 5add79ef..a5629287 100644 --- a/TRUTH3/BNL/EL9/truth3_el.sub +++ b/TRUTH3/BNL/EL9/truth3_el.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process Error = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh request_memory = 3GB request_cpus = 1 diff --git a/TRUTH3/BNL/Native/truth3_native.sub b/TRUTH3/BNL/Native/truth3_native.sub index beef6c2a..d0fa0d84 100644 --- a/TRUTH3/BNL/Native/truth3_native.sub +++ b/TRUTH3/BNL/Native/truth3_native.sub @@ -4,7 +4,7 @@ Output = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Pro Error = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).log -Executable = /usatlas/u/qlei/dev/af-benchmarking/TRUTH3/BNL/Native/run_truth3_native_batch.sh +Executable = /usatlas/u/qlei/AF-Benchmarking/TRUTH3/BNL/Native/run_truth3_native_batch.sh request_memory = 3G From 0e121529218c580ef98f055f4d05d48267790ef5 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Thu, 21 May 2026 15:49:15 -0700 Subject: [PATCH 17/18] test: verify PreToolUse check-typos hook fires on commit --- .hook_test | 1 + 1 file changed, 1 insertion(+) create mode 100644 .hook_test diff --git a/.hook_test b/.hook_test new file mode 100644 index 00000000..9daeafb9 --- /dev/null +++ b/.hook_test @@ -0,0 +1 @@ +test From de4ca61c31f27177b6906562456bd31fe8e79601 Mon Sep 17 00:00:00 2001 From: Qi Bin Lei Date: Thu, 21 May 2026 15:49:26 -0700 Subject: [PATCH 18/18] Revert "test: verify PreToolUse check-typos hook fires on commit" This reverts commit 0e121529218c580ef98f055f4d05d48267790ef5. --- .hook_test | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .hook_test diff --git a/.hook_test b/.hook_test deleted file mode 100644 index 9daeafb9..00000000 --- a/.hook_test +++ /dev/null @@ -1 +0,0 @@ -test