diff --git a/EVNT/BNL/CentOS7/evnt_centos.sub b/EVNT/BNL/CentOS7/evnt_centos.sub index c55ef75f..a8537d15 100644 --- a/EVNT/BNL/CentOS7/evnt_centos.sub +++ b/EVNT/BNL/CentOS7/evnt_centos.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/centos/myjob.$(Cluster).$(Process).log diff --git a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh index 5a8418df..7f38aaee 100755 --- a/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/BNL/CentOS7/run_evnt_centos7_batch.sh @@ -1,28 +1,39 @@ #!/bin/bash -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh +# Current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Sets up the container: ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c centos7 -m /atlasgpfs01 -r "asetup AthGeneration,23.6.31,here && export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ - Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_centos7_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/BNL/EL9/evnt_el.sub b/EVNT/BNL/EL9/evnt_el.sub index af007825..580360b2 100644 --- a/EVNT/BNL/EL9/evnt_el.sub +++ b/EVNT/BNL/EL9/evnt_el.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/el/myjob.$(Cluster).$(Process).log diff --git a/EVNT/BNL/EL9/run_evnt_el9_batch.sh b/EVNT/BNL/EL9/run_evnt_el9_batch.sh index 8f7c5ffd..d099dc65 100755 --- a/EVNT/BNL/EL9/run_evnt_el9_batch.sh +++ b/EVNT/BNL/EL9/run_evnt_el9_batch.sh @@ -1,7 +1,10 @@ #!/bin/bash -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # The OS used in the container OScontainer="el9" @@ -10,21 +13,30 @@ OScontainer="el9" ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -c ${OScontainer} -m /atlasgpfs01 -r "asetup AthGeneration,23.6.34,here &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log &&\ - Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate &&\ echo $(date -u "+%Y-%m-%dT%H:%M:%SZ") >> split.log" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_el9_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/BNL/Native/evnt_native.sub b/EVNT/BNL/Native/evnt_native.sub index 2a2a3251..63de0439 100644 --- a/EVNT/BNL/Native/evnt_native.sub +++ b/EVNT/BNL/Native/evnt_native.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/evnt/native/myjob.$(Cluster).$(Process).log diff --git a/EVNT/BNL/Native/run_evnt_native_batch.sh b/EVNT/BNL/Native/run_evnt_native_batch.sh index 02ed1d54..68d5d1d8 100755 --- a/EVNT/BNL/Native/run_evnt_native_batch.sh +++ b/EVNT/BNL/Native/run_evnt_native_batch.sh @@ -1,8 +1,15 @@ #!/bin/bash +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # The seed used in the job seed=1001 +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Sets up our working environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -12,25 +19,30 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh # Sets up the Ath* version asetup AthGeneration,23.6.34,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Appends time before Gen_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=/atlasgpfs01/usatlas/data/qlei/EVNTJob/100xxx/100001/ --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.generate # Appends time after Gen_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -# Current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/EVNT_native_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" # Obtains and appends the host name and payload size to the log file hostname >> split.log du EVNT.root >> split.log + +append_benchmark log.generate "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves the log file to the output directory mv log.generate "${output_dir}" mv split.log "${output_dir}" diff --git a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh index 4b1b3138..1041d1e3 100755 --- a/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh +++ b/EVNT/UC/CentOS7/run_evnt_centos7_batch.sh @@ -1,5 +1,7 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # shellcheck disable=SC2034 OS_container="centos7" @@ -7,10 +9,13 @@ OS_container="centos7" # shellcheck disable=SC2034 seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" # Creates the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Gen_tf.py to log file @@ -22,7 +27,15 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 -source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here && export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current && Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log" +source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthGeneration,23.6.31,here &&\ + export LHAPATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + export LHAPDF_DATA_PATH=/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current:/cvmfs/atlas.cern.ch/repo/sw/software/23.6/sw/lcg/releases/LCG_104d_ATLAS_13/MCGenerators/lhapdf/6.5.3/x86_64-centos7-gcc11-opt/share/LHAPDF:/cvmfs/atlas.cern.ch/repo/sw/Generators/lhapdfsets/current &&\ + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + /usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=1001 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.generate" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Appends time after Gen_tf.py to a log file { @@ -30,3 +43,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup hostname du EVNT.root } >> split.log + +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/EVNT/UC/EL9/run_evnt_el9_batch.sh b/EVNT/UC/EL9/run_evnt_el9_batch.sh index b45efe93..2cdfb184 100755 --- a/EVNT/UC/EL9/run_evnt_el9_batch.sh +++ b/EVNT/UC/EL9/run_evnt_el9_batch.sh @@ -1,15 +1,20 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # Defining the OS wanted in the container OS_container="el9" # The seed used in the job seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" # Setting up the working environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Gen_tf.py to log file @@ -21,11 +26,18 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c "${OS_container}" -r "asetup AthGeneration,23.6.34,here && \ -Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log" +echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig=${config_dir} --outputEVNTFile=EVNT.root --maxEvents=1000 --randomSeed=${seed} 2>&1 | tee pipe_file.log && \ +cat pipe_file.log >> log.generate" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Appends time after Gen_tf.py to a log file { date -u "+%Y-%m-%dT%H:%M:%SZ" hostname du EVNT.root -} >> split.logi +} >> split.log + +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/EVNT/UC/Native/run_evnt_native_batch.sh b/EVNT/UC/Native/run_evnt_native_batch.sh index 1637a223..2a38bc77 100755 --- a/EVNT/UC/Native/run_evnt_native_batch.sh +++ b/EVNT/UC/Native/run_evnt_native_batch.sh @@ -1,13 +1,19 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + # The seed used in the job seed=1001 +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Directory storing the input files config_dir="${GITHUB_WORKSPACE}/EVNT/EVNTFiles/100xxx/100001" max_events=1000 +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our working environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -21,10 +27,15 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Sets up the Ath* version asetup AthGeneration,23.6.34,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::EVNT Generation" -Gen_tf.py --ecmEnergy=13000.0 --jobConfig="${config_dir}" --outputEVNTFile=EVNT.root --maxEvents="${max_events}" --randomSeed="${seed}" 2>&1 | tee pipe_file.log +/usr/bin/time -v Gen_tf.py --ecmEnergy=13000.0 --jobConfig="${config_dir}" --outputEVNTFile=EVNT.root --maxEvents="${max_events}" --randomSeed="${seed}" 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.generate echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Appends time after Gen_tf.py to a log file echo "::group::Collect Metrics" { @@ -33,3 +44,5 @@ echo "::group::Collect Metrics" du EVNT.root } >> split.log echo "::endgroup::" + +append_benchmark "log.generate" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/coffea/BNL/coffea_el9.sub b/NTuple_Hist/coffea/BNL/coffea_el9.sub index 70b584da..d030860d 100755 --- a/NTuple_Hist/coffea/BNL/coffea_el9.sub +++ b/NTuple_Hist/coffea/BNL/coffea_el9.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/coffea/myjob.$(Cluster).$(Process).log diff --git a/NTuple_Hist/coffea/BNL/run_example.sh b/NTuple_Hist/coffea/BNL/run_example.sh index fd8fd369..66e7cdeb 100755 --- a/NTuple_Hist/coffea/BNL/run_example.sh +++ b/NTuple_Hist/coffea/BNL/run_example.sh @@ -1,7 +1,11 @@ #!/bin/bash -# Gets the current time -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") working_dir="/atlasgpfs01/usatlas/data/qlei/ntuple/coffea" @@ -15,6 +19,7 @@ fi cp ~/AF-Benchmarking/NTuple_Hist/coffea/BNL/example.py . +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -22,7 +27,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ - ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + /usr/bin/time -v ./venv/bin/python example.py 2>&1 | tee coffea_hist.log" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -30,9 +39,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /atlasgpfs01 du coffea.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Coffea_Hist/${start_time}" mkdir -p "${output_dir}" +append_benchmark coffea_hist.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + mv coffea_hist.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/coffea/UC/run_example.sh b/NTuple_Hist/coffea/UC/run_example.sh index 91640ed3..74720f3b 100755 --- a/NTuple_Hist/coffea/UC/run_example.sh +++ b/NTuple_Hist/coffea/UC/run_example.sh @@ -1,10 +1,15 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + #cp ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py . # Setting up environment and container +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -12,7 +17,11 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -m /data -r "ls python3 -m venv venv &&\ ./venv/bin/python -m pip install -U pip &&\ ./venv/bin/python -m pip install atlas_schema 'dask_awkward!=2026.2.0' &&\ - ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + /usr/bin/time -v ./venv/bin/python ${GITHUB_WORKSPACE}/NTuple_Hist/coffea/UC/example.py 2>&1 | tee coffea_hist.log" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") echo "::group::Collect Metrics" { @@ -20,3 +29,5 @@ echo "::group::Collect Metrics" hostname } >> split.log echo "::endgroup::" + +append_benchmark "coffea_hist.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/compare_outputs.py b/NTuple_Hist/compare_outputs.py new file mode 100644 index 00000000..54e91dc4 --- /dev/null +++ b/NTuple_Hist/compare_outputs.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +"""Compare photon pT histogram outputs across NTuple-to-histogram frameworks. + +Requires: ROOT (PyROOT) — available on ATLAS analysis facilities. + +Usage: + python compare_outputs.py \\ + --coffea coffea.root \\ + --eventloop event_loop_noarrays_output_hist.root \\ + --fastframes /srv/output/histograms.root + +FastFrames histogram name: FastFrames uses the convention +{sample}_{region}_{variable}_{systematic}, so for the default BNL config +the histogram is "example_FS_Muon_ph_pt_NOSYS". Override with +--fastframes-hist if your config differs. + +Key difference to look for: coffea and eventloop apply an event-level +tightID cut (skip events with no tightID photon), while FastFrames +fills the underflow with events where the sorted tightID list is empty +(ph1_pt1_NOSYS = -0.999 GeV). The in-range integrals should be close +but FastFrames will have extra entries in the underflow. +""" + +import argparse + +import ROOT + +ROOT.gROOT.SetBatch(True) +ROOT.gStyle.SetOptStat(0) +ROOT.gStyle.SetOptTitle(0) + + +def load_th1(path, name): + """Load a TH1 from a ROOT file, detached from the file.""" + f = ROOT.TFile.Open(path, "READ") + if not f or f.IsZombie(): + raise OSError(f"Cannot open {path}") + h = f.Get(name) + if not h: + keys = [k.GetName() for k in f.GetListOfKeys()] + raise KeyError(f"{name!r} not found in {path}.\nAvailable keys: {keys}") + h = h.Clone() + h.SetDirectory(0) + f.Close() + return h + + +def check_binning(hists): + ref_label, ref_h = hists[0] + ok = True + for label, h in hists[1:]: + if ( + h.GetNbinsX() != ref_h.GetNbinsX() + or h.GetXaxis().GetXmin() != ref_h.GetXaxis().GetXmin() + or h.GetXaxis().GetXmax() != ref_h.GetXaxis().GetXmax() + ): + print(f"WARNING: binning of {label!r} differs from {ref_label!r}") + ok = False + return ok + + +def print_summary(hists): + print( + f"\n{'Framework':<12} {'Integral':>14} {'Peak bin':>12} {'Non-zero bins':>16}" + ) + print("-" * 58) + for label, h in hists: + integral = h.Integral() + peak = h.GetMaximum() + nonzero = sum(1 for i in range(1, h.GetNbinsX() + 1) if h.GetBinContent(i) > 0) + print(f"{label:<12} {integral:>14.4f} {peak:>12.4f} {nonzero:>16d}") + + +def make_ratio(h_num, h_den, name): + ratio = h_num.Clone(name) + ratio.SetDirectory(0) + ratio.Divide(h_den) + return ratio + + +def main(): + parser = argparse.ArgumentParser( + description="Compare photon pT histograms across NTuple-to-histogram frameworks", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("--coffea", required=True, help="coffea output ROOT file") + parser.add_argument( + "--coffea-hist", + default="all", + metavar="NAME", + help="histogram name in coffea ROOT file (default: all)", + ) + parser.add_argument("--eventloop", required=True, help="eventloop output ROOT file") + parser.add_argument( + "--eventloop-hist", + default="baseline_pt_total", + metavar="NAME", + help="histogram name in eventloop ROOT file (default: baseline_pt_total)", + ) + parser.add_argument( + "--fastframes", required=True, help="fastframes output ROOT file" + ) + parser.add_argument( + "--fastframes-hist", + default="example_FS_Muon_ph_pt_NOSYS", + metavar="NAME", + help="histogram name in fastframes ROOT file (default: example_FS_Muon_ph_pt_NOSYS)", + ) + parser.add_argument( + "--plot", + default="comparison.pdf", + metavar="PATH", + help="output plot file (default: comparison.pdf; pass empty string to skip)", + ) + args = parser.parse_args() + + coffea_h = load_th1(args.coffea, args.coffea_hist) + el_h = load_th1(args.eventloop, args.eventloop_hist) + ff_h = load_th1(args.fastframes, args.fastframes_hist) + + hists = [("coffea", coffea_h), ("eventloop", el_h), ("fastframes", ff_h)] + + binning_ok = check_binning(hists) + print_summary(hists) + + if binning_ok: + ratio_el = make_ratio(el_h, coffea_h, "ratio_el") + ratio_ff = make_ratio(ff_h, coffea_h, "ratio_ff") + + mean_el = sum( + ratio_el.GetBinContent(i) + for i in range(1, ratio_el.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ) / max( + 1, + sum( + 1 + for i in range(1, coffea_h.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ), + ) + mean_ff = sum( + ratio_ff.GetBinContent(i) + for i in range(1, ratio_ff.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ) / max( + 1, + sum( + 1 + for i in range(1, coffea_h.GetNbinsX() + 1) + if coffea_h.GetBinContent(i) > 0 + ), + ) + print("\nMean bin ratio (where coffea > 0):") + print(f" EventLoop / Coffea : {mean_el:.4f}") + print(f" FastFrames / Coffea : {mean_ff:.4f}") + + if not args.plot: + return + + canvas = ROOT.TCanvas("comparison", "Framework Comparison", 800, 800) + pad_top = ROOT.TPad("pad_top", "", 0, 0.3, 1, 1) + pad_bot = ROOT.TPad("pad_bot", "", 0, 0, 1, 0.3) + pad_top.SetBottomMargin(0.03) + pad_top.SetTopMargin(0.08) + pad_bot.SetTopMargin(0.03) + pad_bot.SetBottomMargin(0.32) + pad_top.Draw() + pad_bot.Draw() + + # --- top pad --- + pad_top.cd() + pad_top.SetLogy() + + coffea_h.SetLineColor(ROOT.kBlue) + coffea_h.SetLineWidth(2) + coffea_h.GetYaxis().SetTitle("Events / bin") + coffea_h.GetYaxis().SetTitleSize(0.05) + coffea_h.GetYaxis().SetLabelSize(0.04) + coffea_h.GetXaxis().SetLabelSize(0) + coffea_h.Draw("HIST") + + el_h.SetLineColor(ROOT.kGreen + 2) + el_h.SetLineWidth(2) + el_h.SetLineStyle(2) + el_h.Draw("HIST SAME") + + ff_h.SetLineColor(ROOT.kRed) + ff_h.SetLineWidth(2) + ff_h.SetLineStyle(3) + ff_h.Draw("HIST SAME") + + legend = ROOT.TLegend(0.58, 0.68, 0.88, 0.88) + legend.SetBorderSize(0) + legend.AddEntry(coffea_h, "Coffea", "l") + legend.AddEntry(el_h, "EventLoop", "l") + legend.AddEntry(ff_h, "FastFrames", "l") + legend.Draw() + + title_latex = ROOT.TLatex() + title_latex.SetNDC() + title_latex.SetTextSize(0.05) + title_latex.DrawLatex(0.12, 0.93, "Photon p_{T}: Coffea vs EventLoop vs FastFrames") + + # --- bottom pad --- + pad_bot.cd() + + if binning_ok: + ratio_el.SetLineColor(ROOT.kGreen + 2) + ratio_el.SetLineWidth(2) + ratio_el.SetLineStyle(2) + ratio_el.GetYaxis().SetTitle("Ratio to Coffea") + ratio_el.GetYaxis().SetRangeUser(0.5, 1.5) + ratio_el.GetYaxis().SetNdivisions(505) + ratio_el.GetYaxis().SetTitleSize(0.11) + ratio_el.GetYaxis().SetTitleOffset(0.45) + ratio_el.GetYaxis().SetLabelSize(0.09) + ratio_el.GetXaxis().SetTitle("Photon p_{T} [GeV]") + ratio_el.GetXaxis().SetTitleSize(0.12) + ratio_el.GetXaxis().SetLabelSize(0.10) + ratio_el.Draw("HIST") + + ratio_ff.SetLineColor(ROOT.kRed) + ratio_ff.SetLineWidth(2) + ratio_ff.SetLineStyle(3) + ratio_ff.Draw("HIST SAME") + + xmin = coffea_h.GetXaxis().GetXmin() + xmax = coffea_h.GetXaxis().GetXmax() + unity = ROOT.TLine(xmin, 1.0, xmax, 1.0) + unity.SetLineColor(ROOT.kBlack) + unity.SetLineWidth(1) + unity.Draw() + + bot_legend = ROOT.TLegend(0.58, 0.78, 0.88, 0.95) + bot_legend.SetBorderSize(0) + bot_legend.SetTextSize(0.09) + bot_legend.AddEntry(ratio_el, "EventLoop / Coffea", "l") + bot_legend.AddEntry(ratio_ff, "FastFrames / Coffea", "l") + bot_legend.Draw() + else: + msg = ROOT.TLatex() + msg.SetNDC() + msg.SetTextSize(0.08) + msg.DrawLatex(0.15, 0.5, "Binning mismatch #font[52]{ratio unavailable}") + + canvas.SaveAs(args.plot) + print(f"\nPlot saved to: {args.plot}") + + +if __name__ == "__main__": + main() diff --git a/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py b/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py deleted file mode 100644 index 43975d04..00000000 --- a/NTuple_Hist/event_loop/BNL/columnar/event_loop_arrays.py +++ /dev/null @@ -1,106 +0,0 @@ -import ROOT as r -from pathlib import Path -from array import array - - -ph_pt_array = r.std.vector("float")() -ph_select_tightID_array = r.std.vector("char")() -weight_mc_array = array("f", [0.0]) -weight_pileup_array = array("f", [0.0]) - - -def photon_eventloop(f_name, h_baseline_pt, metadata): - """Process one ROOT file and fill the given histogram.""" - fp = r.TFile.Open(f_name, "READ") - if not fp or fp.IsZombie(): - print(f"Could not open file: {f_name}") - return - - tree = fp.Get("analysis") - if not tree: - print(f"No tree named 'analysis' in {f_name}") - fp.Close() - return - - tree.SetBranchAddress("ph_pt_NOSYS", ph_pt_array) - tree.SetBranchAddress("ph_select_tightID_NOSYS", ph_select_tightID_array) - tree.SetBranchAddress("weight_mc_NOSYS", weight_mc_array) - tree.SetBranchAddress("weight_pileup_NOSYS", weight_pileup_array) - - totalevents = tree.GetEntriesFast() - print(f" {f_name} has {totalevents} events") - - # Normalization factors - xs = metadata["xs"] - lum = metadata["luminosity"] - genFiltEff = metadata["genFiltEff"] - kfactor = metadata.get("kfactor", 1.0) - sumOfWeights = metadata["sum_of_weights"] - weight_norm = xs * genFiltEff * kfactor * lum / sumOfWeights - - # name of branches in tree - # print(tree.GetListOfBranches()) - - # Event loop - numevents = tree.GetEntriesFast() - for i in range(numevents): - if (i + 1) % 50000 == 0: - print(f" Processed {i + 1:6d}/{totalevents}") - - tree.GetEntry(i) - - weight = weight_norm * weight_mc_array[0] * weight_pileup_array[0] - - # print("--------------------------------------------") - for index in range(len(ph_pt_array)): - if not (ord(ph_select_tightID_array[index]) > 0): - continue - - h_baseline_pt.Fill(ph_pt_array[index] / 1000.0, weight) # Fill GeV - break # only fill with first passing photon - - fp.Close() - - -def main(): - # Define samples (all will contribute to one histogram) - samples = [ - { - "name": "Wmunugamma", - "path": Path( - "/data/maclwong/Ben_Bkg_Samples/v2/user.bhodkins.700402.Wmunugamma.mc20e.v2.0_ANALYSIS.root/" - ), - "metadata": { - "genFiltEff": 1.0, - "luminosity": 58.7916, - "xs": 364840.0, - "sum_of_weights": 1816229744476160.0, - "kfactor": 1.0, - }, - } - ] - - # Create a single histogram for all samples combined - h_baseline_pt = r.TH1D( - "baseline_pt_total", - "Photon baseline pT (all samples); pT [GeV]; Events", - 100, - 0, - 1000, - ) - - # Process every file in every sample, filling the same histogram - for s in samples: - print(f"\nProcessing sample: {s['name']}") - for f in sorted(s["path"].glob("*.root")): - photon_eventloop(str(f), h_baseline_pt, s["metadata"]) - - # Save combined histogram - output_file = r.TFile("event_loop_arrays_output_hist.root", "RECREATE") - h_baseline_pt.Write() - output_file.Close() - print("\nCombined histogram written to event_loop_output_hist.root") - - -if __name__ == "__main__": - main() diff --git a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh index b2bc6527..7cb964bc 100755 --- a/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/BNL/columnar/run_eventloop_arrays.sh @@ -1,15 +1,23 @@ #!/bin/bash -# Time that will be used to store the log file -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 + +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.py 2>&1 | tee eventloop_arrays.log +/usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.py 2>&1 | tee eventloop_arrays.log { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -17,9 +25,23 @@ python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/columnar/eventloop_arrays.p du event_loop_output_hist.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays/${curr_time}" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_arrays/${start_time}" mkdir -p "${output_dir}" +echo "Start Time: ${start_time}" +echo "End Time: ${end_time}" + +# Verify the log exists before appending +if [ -f eventloop_arrays.log ]; then + append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" +else + echo "ERROR: eventloop_arrays.log not found in $(pwd)" +fi + +# append_benchmark eventloop_arrays.log "${start_time}" "${end_time}" + mv eventloop_arrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/BNL/standard/event_loop.sub b/NTuple_Hist/event_loop/BNL/standard/event_loop.sub deleted file mode 100644 index 5753218c..00000000 --- a/NTuple_Hist/event_loop/BNL/standard/event_loop.sub +++ /dev/null @@ -1,12 +0,0 @@ -Universe = vanilla - -Output = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).out -Error = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).err -Log = /usatlas/u/jroblesgo/batch_output_files/eventloop/myjob.$(Cluster).$(Process).log - -Executable = /home/selbor/AF-Benchmarking/NTuple_Hist/event_loop/UC/run_photon_eventloop.sh - -request_memory = 3GB -request_cpus = 1 - -Queue 1 diff --git a/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py b/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py deleted file mode 100644 index bff6e3b4..00000000 --- a/NTuple_Hist/event_loop/BNL/standard/event_loop_noarrays.py +++ /dev/null @@ -1,91 +0,0 @@ -import ROOT as r -from pathlib import Path - - -def photon_eventloop(f_name, h_baseline_pt, metadata): - """Process one ROOT file and fill the given histogram.""" - fp = r.TFile.Open(f_name, "READ") - if not fp or fp.IsZombie(): - print(f"Could not open file: {f_name}") - return - - tree = fp.Get("analysis") - if not tree: - print(f"No tree named 'analysis' in {f_name}") - fp.Close() - return - - totalevents = tree.GetEntriesFast() - print(f" {f_name} has {totalevents} events") - - # Normalization factors - xs = metadata["xs"] - lum = metadata["luminosity"] - genFiltEff = metadata["genFiltEff"] - kfactor = metadata.get("kfactor", 1.0) - sumOfWeights = metadata["sum_of_weights"] - weight_norm = xs * genFiltEff * kfactor * lum / sumOfWeights - - # name of branches in tree - # print(tree.GetListOfBranches()) - - # Event loop - for i, event in enumerate(tree): - if (i + 1) % 50000 == 0: - print(f" Processed {i + 1:6d}/{totalevents}") - - weight = weight_norm * event.weight_mc_NOSYS * event.weight_pileup_NOSYS - - for index, ph_pt in enumerate(event.ph_pt_NOSYS): - ph_tight = ord(event.ph_select_tightID_NOSYS[index]) > 0 - if not ph_tight: - continue - - h_baseline_pt.Fill(ph_pt / 1000.0, weight) # Fill GeV - break # only fill with first passing photon - - fp.Close() - - -def main(): - # Define samples (all will contribute to one histogram) - samples = [ - { - "name": "Wmunugamma", - "path": Path( - "/data/maclwong/Ben_Bkg_Samples/v2/user.bhodkins.700402.Wmunugamma.mc20e.v2.0_ANALYSIS.root/" - ), - "metadata": { - "genFiltEff": 1.0, - "luminosity": 58.7916, - "xs": 364840.0, - "sum_of_weights": 1816229744476160.0, - "kfactor": 1.0, - }, - } - ] - - # Create a single histogram for all samples combined - h_baseline_pt = r.TH1D( - "baseline_pt_total", - "Photon baseline pT (all samples); pT [GeV]; Events", - 100, - 0, - 1000, - ) - - # Process every file in every sample, filling the same histogram - for s in samples: - print(f"\nProcessing sample: {s['name']}") - for f in sorted(s["path"].glob("*.root")): - photon_eventloop(str(f), h_baseline_pt, s["metadata"]) - - # Save combined histogram - output_file = r.TFile("event_loop_noarrays_output_hist.root", "RECREATE") - h_baseline_pt.Write() - output_file.Close() - print("\nCombined histogram written to event_loop_output_hist.root") - - -if __name__ == "__main__": - main() diff --git a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh index 7173ab92..8ff6f74e 100755 --- a/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/BNL/standard/run_eventloop_noarrays.sh @@ -1,7 +1,12 @@ #!/bin/bash -# Time that will be used to store the log file -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -9,8 +14,11 @@ export ALRB_localConfigDir="$HOME"/localConfig source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh asetup StatAnalysis,0.6.3 + +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ -python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.py 2>&1 | tee eventloop_noarrays.log +/usr/bin/time -v python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays.py 2>&1 | tee eventloop_noarrays.log { date -u "+%Y-%m-%dT%H:%M:%SZ" @@ -18,9 +26,13 @@ python3 ~/AF-Benchmarking/NTuple_Hist/event_loop/BNL/standard/eventloop_noarrays du event_loop_noarrays_output_hist.root } >> split.log -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${curr_time}" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/eventloop_noarrays/${start_time}" mkdir -p "${output_dir}" +append_benchmark eventloop_noarrays.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + mv eventloop_noarrays.log "${output_dir}" mv split.log "${output_dir}" diff --git a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh index 5d3dba57..b0fb3f44 100755 --- a/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh +++ b/NTuple_Hist/event_loop/UC/columnar/run_eventloop_arrays.sh @@ -1,5 +1,10 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -8,16 +13,22 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh echo "::endgroup::" lsetup "views LCG_107a_ATLAS_2 x86_64-el9-gcc13-opt" +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Getting start date date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Running the script echo "::group::EventLoop Execution" -python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/columnar/event_loop_arrays.py 2>&1 | tee eventloop_arrays.log +/usr/bin/time -v python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/columnar/event_loop_arrays.py 2>&1 | tee eventloop_arrays.log echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Collect metrics echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" + +append_benchmark "eventloop_arrays.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh index b1de4803..a491cb7c 100755 --- a/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh +++ b/NTuple_Hist/event_loop/UC/standard/run_eventloop_noarrays.sh @@ -1,5 +1,11 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -7,17 +13,22 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh echo "::endgroup::" lsetup "views LCG_107a_ATLAS_2 x86_64-el9-gcc13-opt" +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Getting start date date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Running the script echo "::group::EventLoop Execution" -python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/standard/event_loop_noarrays.py 2>&1 | tee eventloop_noarrays.log +/usr/bin/time -v python3 "${GITHUB_WORKSPACE}"/NTuple_Hist/event_loop/UC/standard/event_loop_noarrays.py 2>&1 | tee eventloop_noarrays.log echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Collect metrics echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" + +append_benchmark "eventloop_noarrays.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub index 78b03263..8fadba3a 100644 --- a/NTuple_Hist/fastframes/BNL/fastframes_el9.sub +++ b/NTuple_Hist/fastframes/BNL/fastframes_el9.sub @@ -1,7 +1,6 @@ # shellcheck shell=bash disable=all Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/fastframes/myjob.$(Cluster).$(Process).log diff --git a/NTuple_Hist/fastframes/BNL/run_fastframes.sh b/NTuple_Hist/fastframes/BNL/run_fastframes.sh index 55d1965e..69a6be30 100755 --- a/NTuple_Hist/fastframes/BNL/run_fastframes.sh +++ b/NTuple_Hist/fastframes/BNL/run_fastframes.sh @@ -1,11 +1,17 @@ #!/bin/bash -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage + +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") cd /atlasgpfs01/usatlas/data/qlei/ || exit # Sets up ATLAS environment echo "::group::setupATLAS" +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig # shellcheck disable=SC1091 @@ -16,7 +22,8 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -m /atlasgpfs01/usatlas/data/ -c el9 -r "asetup StatAnalysis,0.6.2 &&\ source /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/TutorialClass/build/setup.sh &&\ - python3 /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/FastFrames/python/FastFrames.py -c /atlasgpfs01/usatlas/data/qlei/input/mc20e_example_config.yml 2>&1 | tee fastframes.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + /usr/bin/time -v python3 /atlasgpfs01/usatlas/data/qlei/FastFramesTutorial/FastFrames/python/FastFrames.py -c /atlasgpfs01/usatlas/data/qlei/input/mc20e_example_config.yml 2>&1 | tee fastframes.log" # Getting the date and time after running script echo "::group::Collect Metrics" @@ -26,11 +33,16 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log hostname >> split.log echo "::endgroup::" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/FastFrames_NTuple/${start_time}" # Creates output dir mkdir -p "${output_dir}" +append_benchmark fastframes.log "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves log to outputdir mv fastframes.log "${output_dir}" diff --git a/NTuple_Hist/fastframes/UC/run_fastframes.sh b/NTuple_Hist/fastframes/UC/run_fastframes.sh index bdf96592..80afa566 100755 --- a/NTuple_Hist/fastframes/UC/run_fastframes.sh +++ b/NTuple_Hist/fastframes/UC/run_fastframes.sh @@ -1,7 +1,13 @@ #!/bin/bash +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" + yml_dir="${GITHUB_WORKSPACE}/NTuple_Hist/fastframes/UC/" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our working environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -16,13 +22,17 @@ printf "%s" "${VOMS_PASSWORD}" | voms-proxy-init -voms atlas # shellcheck disable=SC1091 source /data/selbor/FastFramesTutorial/TutorialClass/build/setup.sh +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log echo "::group::FastFrames" -python3 /data/selbor/FastFramesTutorial/FastFrames/python/FastFrames.py -c "${yml_dir}"mc20e_example_config.yml 2>&1 | tee fastframes.log +/usr/bin/time -v python3 /data/selbor/FastFramesTutorial/FastFrames/python/FastFrames.py -c "${yml_dir}"mc20e_example_config.yml 2>&1 | tee fastframes.log printf "\n" echo "::endgroup::" +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Getting the date and time after running script echo "::group::Collect Metrics" date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log @@ -37,3 +47,5 @@ cleanup_dir="/home/selbor/ntuple/fastframes" if [[ -d "${cleanup_dir}" && "${cleanup_dir}" == "/home/selbor/ntuple/fastframes" ]]; then rm -rf "${cleanup_dir:?}/"* fi + +append_benchmark "fastframes.log" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/Rucio/rucio_script.sh b/Rucio/rucio_script.sh index 630b45ce..f7bb31e2 100755 --- a/Rucio/rucio_script.sh +++ b/Rucio/rucio_script.sh @@ -11,6 +11,7 @@ container_el9 (){ # - dir_mount (2) # - output_dir (3) # - download_ID (4) + start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") cd "${1}" || exit export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -25,6 +26,8 @@ container_el9 (){ hostname >> rucio.log &&\ du \"${4#*:}\"/ >> rucio.log &&\ mv rucio.log \"${3}\"" + end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + append_benchmark "${3}/rucio.log" "${start_time}" "${end_time}" "${start_time}" "${start_time}" "rucio" } native_el9 () { @@ -32,6 +35,7 @@ native_el9 () { # - output_dir # - job_dir # - download_ID + start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase export ALRB_localConfigDir="$HOME"/localConfig @@ -48,10 +52,12 @@ native_el9 () { echo "::group::Rucio Download" rucio download --rses AGLT2_LOCALGROUPDISK "${3}" 2>&1 | tee rucio.log echo "::endgroup::" + end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") echo "::group::Collect Metrics" hostname >> rucio.log du "${3#*:}" >> rucio.log echo "::endgroup::" + append_benchmark "rucio.log" "${start_time}" "${end_time}" "${start_time}" "${start_time}" "rucio" mv rucio.log "${1}" } @@ -82,6 +88,8 @@ case "$site" in job_dir="/usatlas/u/qlei/test/Rucio/" dir_mount="/atlasgpfs01/usatlas/data/" output_dir="/atlasgpfs01/usatlas/data/qlei/logs/Rucio/${curr_time}/" + AF_BENCH_DIR="/usatlas/u/qlei/AF-Benchmarking" + source ${AF_BENCH_DIR}/parsing/utils/benchmark_utils.sh container_el9 "$job_dir" "$dir_mount" "$output_dir" "$download_ID" ;; slac) @@ -92,6 +100,7 @@ case "$site" in ;; uchicago) output_dir="${PWD}" + source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" native_el9 "${PWD}" "${PWD}" "$download_ID" ;; nersc) diff --git a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh index 4588f010..cb8308e3 100755 --- a/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/BNL/CentOS7/run_truth3_centos7_batch.sh @@ -1,12 +1,17 @@ #!/bin/bash + +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + # current time used for log file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . # Sets up the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Sets up the container: @@ -15,16 +20,24 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ - Reco_tf.py --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log &&\ + /usr/bin/time -v Reco_tf.py --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.EVNTtoDAOD &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log" -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${curr_time}" +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_centos7_batch/${start_time}" mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log + +append_benchmark log.EVNTtoDAOD "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves the log file to the output directory mv log.EVNTtoDAOD "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh index 1d1c1fd5..ed3b9585 100755 --- a/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/BNL/EL9/run_truth3_el9_batch.sh @@ -1,12 +1,16 @@ #!/bin/bash -# Current time used for file storage -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# Current time used for file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . # Sets up the environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Sets up the container: @@ -15,13 +19,17 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log &&\ - Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log &&\ - date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log &&\ + /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log &&\ + cat pipe_file.log >> log.Derivation &&\ + date -u \"+%Y-%m-%dT%H:%M:%SZ\" >> split.log" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Defines the output directory -# output_dir="/atlasgpfs01/usatlas/data/jroblesgo/benchmarks/${curr_time}/TRUTH3_el9_batch" -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch/${curr_time}" +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_el9_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" @@ -30,6 +38,8 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log +append_benchmark log.Derivation "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" + # Moves the log file to the output directory mv log.Derivation "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/EL9/truth3_el.sub b/TRUTH3/BNL/EL9/truth3_el.sub index 8f6c313c..a5629287 100644 --- a/TRUTH3/BNL/EL9/truth3_el.sub +++ b/TRUTH3/BNL/EL9/truth3_el.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/el/myjob.$(Cluster).$(Process).log diff --git a/TRUTH3/BNL/Native/run_truth3_native_batch.sh b/TRUTH3/BNL/Native/run_truth3_native_batch.sh index d10c617e..97936eb5 100755 --- a/TRUTH3/BNL/Native/run_truth3_native_batch.sh +++ b/TRUTH3/BNL/Native/run_truth3_native_batch.sh @@ -1,10 +1,16 @@ #!/bin/bash -curr_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") +# shellcheck disable=SC1091 +source /usatlas/u/qlei/AF-Benchmarking/parsing/utils/benchmark_utils.sh + +# current time used for log file storage +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Copying input files to working directory cp -r ~/AF-Benchmarking/TRUTH3/EVNT.root . +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our environment export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # shellcheck disable=SC1091 @@ -13,21 +19,22 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh # Sets the Athena version we want asetup Athena,24.0.53,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Appends time before Derivation_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +/usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log -Derivation_tf.py --CA True --inputEVNTFile EVNT.root --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.Derivation # Appends time after Derivation_tf.py to a log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log -# current time used for log file storage - +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Defines the output directory -output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch/${curr_time}" - +output_dir="/atlasgpfs01/usatlas/data/qlei/logs/TRUTH3_native_batch/${start_time}" # Creates the output directory mkdir -p "${output_dir}" @@ -36,6 +43,8 @@ mkdir -p "${output_dir}" hostname >> split.log du DAOD_TRUTH3.TRUTH3.root >> split.log +append_benchmark log.Derivation "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" "truth_v" + # Moves the log file to the output directory mv log.Derivation "${output_dir}" mv split.log "${output_dir}" diff --git a/TRUTH3/BNL/Native/truth3_native.sub b/TRUTH3/BNL/Native/truth3_native.sub index 710ad981..d0fa0d84 100644 --- a/TRUTH3/BNL/Native/truth3_native.sub +++ b/TRUTH3/BNL/Native/truth3_native.sub @@ -1,6 +1,5 @@ Universe = vanilla - Output = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).out Error = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).err Log = /usatlas/u/qlei/batch_output_files/truth3/native/myjob.$(Cluster).$(Process).log diff --git a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh index 1e2d4573..ae21abd3 100755 --- a/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh +++ b/TRUTH3/UC/CentOS7/run_truth3_centos7_batch.sh @@ -1,11 +1,15 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Defines the directory where the input files are stored config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up the ATLAS Environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Reco_tf.py to log file @@ -17,7 +21,12 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup AthDerivation,21.2.178.0,here && \ - Reco_tf.py --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ + /usr/bin/time -v Reco_tf.py --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --reductionConf TRUTH3 2>&1 | tee pipe_file.log && \ + cat pipe_file.log >> log.EVNTtoDAOD" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Obtains and appends the host machine and payload size to the log file { @@ -26,3 +35,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c centos7 -r "asetup hostname du DAOD_TRUTH3.TRUTH3.root } >> split.log + +append_benchmark "log.EVNTtoDAOD" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh index 9392c304..45a9fb6f 100755 --- a/TRUTH3/UC/EL9/run_truth3_el9_batch.sh +++ b/TRUTH3/UC/EL9/run_truth3_el9_batch.sh @@ -1,24 +1,32 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Defines the directory where the input files are stored config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up the environment +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Appends time before Derivation_tf.py to log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log - # Sets up the container: ## -c : used to make a container followed by the OS we want to use ## -m : mounts a specific directory ## -r : precedes the commands we want to run within the container # shellcheck disable=SC1091 source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athena,24.0.53,here && \ - Derivation_tf.py --CA True --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log" + echo \"SETUP_COMPLETE=\$(date -u '+%Y-%m-%dT%H:%M:%SZ')\" >> split.log && \ + /usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile ${config_dir} --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log && \ + cat pipe_file.log >> log.Derivation" + +setup_end=$(grep "^SETUP_COMPLETE=" split.log 2>/dev/null | tail -1 | sed 's/^SETUP_COMPLETE=//') +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Obtains and appends the host machine and payload size to the log file { @@ -27,3 +35,5 @@ source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh -c el9 -r "asetup Athe hostname du DAOD_TRUTH3.TRUTH3.root } >> split.log + +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/TRUTH3/UC/Native/run_truth3_native_batch.sh b/TRUTH3/UC/Native/run_truth3_native_batch.sh index 4af2e42a..d906dac4 100755 --- a/TRUTH3/UC/Native/run_truth3_native_batch.sh +++ b/TRUTH3/UC/Native/run_truth3_native_batch.sh @@ -1,10 +1,15 @@ #!/bin/bash # shellcheck disable=SC1091 +source "${GITHUB_WORKSPACE}/parsing/utils/benchmark_utils.sh" # Input files are stored here config_dir="${GITHUB_WORKSPACE}/TRUTH3/EVNT.root" +start_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + +setup_start=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + # Sets up our environment echo "::group::setupATLAS" export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase @@ -18,13 +23,17 @@ date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log # Sets the Athena version we want asetup Athena,24.0.53,here +setup_end=$(date -u "+%Y-%m-%dT%H:%M:%SZ") + echo "::group::TRUTH3 Derivation" -Derivation_tf.py --CA True --inputEVNTFile "${config_dir}" --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +/usr/bin/time -v Derivation_tf.py --CA True --inputEVNTFile "${config_dir}" --outputDAODFile=TRUTH3.root --formats TRUTH3 2>&1 | tee pipe_file.log +cat pipe_file.log >> log.Derivation echo "::endgroup::" # Appends time after Derivation_tf.py to a log file date -u "+%Y-%m-%dT%H:%M:%SZ" >> split.log +end_time=$(date -u "+%Y-%m-%dT%H:%M:%SZ") # Obtains and appends the host machine and payload size to the log file echo "::group::Collect Metrics" @@ -35,3 +44,5 @@ echo "::group::Collect Metrics" du DAOD_TRUTH3.TRUTH3.root } >> split.log echo "::endgroup::" + +append_benchmark "log.Derivation" "${start_time}" "${end_time}" "${setup_start}" "${setup_end}" diff --git a/parsing/base/parsing_base.py b/parsing/base/parsing_base.py deleted file mode 100644 index a0c9b012..00000000 --- a/parsing/base/parsing_base.py +++ /dev/null @@ -1,100 +0,0 @@ -import json -from pathlib import Path -from collections.abc import Callable -import hashlib -from datetime import datetime, timedelta - - -class ParsingClass: - def __init__(self, log_dir: str, state_file: str = "state/parsed_state.json"): - self.log_dir = Path(log_dir) - self.state_file = Path(state_file) - - # Initializes state - self.state = self._load_state() - - # Register: pattern -> parsing function - self.parsers = {} - - def _load_state(self) -> dict: - if self.state_file.exists(): - with open(self.state) as f: - return json.load(f) - return {"parsed_files": {}} - - def _save_state(self): - self.state_file.parent.mkdir(parents=True, exist_ok=True) - with open(self.state_file, "w") as f: - json.dump(self.state, f, indent=2) - - def _file_hash(self, file_path: Path) -> str: - """Creates file hash to detect any changes""" - hasher = hashlib.md5() - with open(file_path, "rb") as f: - hasher.update(f.read()) - return hasher.hexdigest() - - def _marked_as_parsed(self, file_path: Path, file_hash: str): - self.state["parsed_files"][str(file_path)] = file_hash - self._save_state() - - def _has_been_parsed(self, file_path: Path, file_hash: str) -> bool: - return ( - str(file_path) in self.state["parsed_files"] - and self.state["parsed_files"][str(file_path)] == file_hash - ) - - def register_parsers(self, patterns: str, func: Callable): - """Registers functions for log types""" - self.parsers[patterns] = func - - def _recent_dirs(self, days=7): - cutoff = datetime.now() - timedelta(days=days) - recent = [] - - for d in self.log_dir.iterdir(): - if d.is_dir(): - try: - dt = datetime.strptime(d.name, "%Y.%m.%dT%H") - if dt >= cutoff: - recent.append(d) - except ValueError: - continue - return recent - - def discover_logs(self): - logs = [] - for d in self._recent_dirs(days=7): - for pattern in self.parsers: - logs.extend(d.rglob(pattern)) - return logs - - def parse_all(self): - logs = self.discover_logs() - print(f"Found {len(logs)} logs") - - for log_file in logs: - file_hash = self._file_hash(log_file) - - if self._has_been_parsed(log_file, file_hash): - print(f"[SKIP] Already Parsed: {log_file}") - continue - - print(f"[PARSE] Parsing: {log_file}") - self._parse_file(log_file) - - # Marks files as successful parsing - self._mark_as_parsed(log_file, file_hash) - - def _mark_as_parsed(self, path, file_hash): - """Records that a file has been successfully parsed.""" - self.state["parsed_files"][str(path)] = file_hash - self._save_state() - - def _parse_file(self, log_file: Path): - for pattern, func in self.parsers.items(): - if log_file.match(pattern): - func(log_file) - return - - raise ValueError(f"No parser registered for file: {log_file}") diff --git a/parsing/base_parser.py b/parsing/base_parser.py new file mode 100644 index 00000000..6597e2f6 --- /dev/null +++ b/parsing/base_parser.py @@ -0,0 +1,90 @@ +"""Base parser class for ATLAS benchmark logs. + +This module provides common parsing logic shared across different +benchmark types (TRUTH3, EVNT, etc.). +""" + +import arrow + + +def parse_benchmark_block(file_lines): + """Extract key=value pairs from the LAST === BENCHMARK === block in a log. + + Args: + file_lines: List of lines from the log file + + Returns: + dict: All key=value pairs from the last benchmark block, + or empty dict if no block found + """ + last_block = {} + current_block = {} + in_block = False + + for line in file_lines: + line = line.strip() + if line == "=== BENCHMARK ===": + in_block = True + current_block = {} + continue + if line == "=================" and in_block: + in_block = False + last_block = current_block # keep overwriting — last one wins + continue + if in_block and "=" in line: + key, _, value = line.partition("=") + current_block[key.strip()] = value.strip() + + return last_block + + +def parse_atlas_log(path, log_name="ATLAS"): + """Parse ATLAS benchmark log file for timing information. + + Args: + path: Path to log file + log_name: Name of benchmark for logging (e.g., "TRUTH3", "EVNT") + + Returns: + dict: Parsed timing data with keys: + - submitTime: UTC timestamp in milliseconds + - queueTime: Queue time in seconds + - runTime: Execution time in seconds + - status: Exit status (0 = success) + """ + print(f"[{log_name}] Parsing {path.name}") + + with open(path) as f: + file_lines = f.readlines() + + benchmark = parse_benchmark_block(file_lines) + + if not benchmark: + raise ValueError(f"[{log_name}] No BENCHMARK block found in {path.name}") + + start_dt = arrow.get(benchmark["start_time_utc"]) + end_dt = arrow.get(benchmark["end_time_utc"]) + + if "submit_time_utc" in benchmark: + submit_dt = arrow.get(benchmark["submit_time_utc"]) + submit_time_ms = submit_dt.int_timestamp * 1000 + queue_time = max(0, int((start_dt - submit_dt).total_seconds())) + else: + submit_time_ms = start_dt.int_timestamp * 1000 + queue_time = 0 + + result = { + "submitTime": submit_time_ms, + "queueTime": queue_time, + "runTime": int((end_dt - start_dt).total_seconds()), + "status": int(benchmark.get("exit_status", 0)), + } + + if "setup_start_time_utc" in benchmark and "setup_end_time_utc" in benchmark: + setup_start_dt = arrow.get(benchmark["setup_start_time_utc"]) + setup_end_dt = arrow.get(benchmark["setup_end_time_utc"]) + result["setupTime"] = max( + 0, int((setup_end_dt - setup_start_dt).total_seconds()) + ) + + return result diff --git a/parsing/handlers/__init__.py b/parsing/handlers/__init__.py deleted file mode 100644 index 28435e3a..00000000 --- a/parsing/handlers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Log parser handlers for different benchmark types.""" diff --git a/parsing/handlers/base_parser.py b/parsing/handlers/base_parser.py deleted file mode 100644 index bdec6f85..00000000 --- a/parsing/handlers/base_parser.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Base parser class for ATLAS benchmark logs. - -This module provides common parsing logic shared across different -benchmark types (TRUTH3, EVNT, etc.). -""" - -import datetime as dt - - -# Month abbreviation to number mapping -MONTH_DICT = { - "Jan": "01", - "Feb": "02", - "Mar": "03", - "Apr": "04", - "May": "05", - "Jun": "06", - "Jul": "07", - "Aug": "08", - "Sep": "09", - "Oct": "10", - "Nov": "11", - "Dec": "12", -} - - -def parse_atlas_log(path, log_name="ATLAS"): - """Parse ATLAS benchmark log file for timing information. - - Args: - path: Path to log file - log_name: Name of benchmark for logging (e.g., "TRUTH3", "EVNT") - - Returns: - dict: Parsed timing data with keys: - - submitTime: UTC timestamp in milliseconds - - queueTime: Queue time in seconds - - runTime: Execution time in seconds - - status: Exit status (0 = success) - """ - print(f"[{log_name}] Parsing {path.name}") - - # Read log file - with open(path) as f: - file_lines = f.readlines() - N = len(file_lines) - - # Parse start datetime from first line - start_datetime_list = file_lines[0].split(" ") - end_time_list = file_lines[N - 1].split(" ") - - start_time = start_datetime_list[0] - month = int(MONTH_DICT[start_datetime_list[2]]) - year = int(start_datetime_list[-1]) - - # Handle different date formats (with/without day of week) - if len(start_datetime_list) == 8: - day = int(start_datetime_list[4]) - submit_time = dt.datetime.strptime(start_datetime_list[5], "%H:%M:%S").time() - else: - day = int(start_datetime_list[3]) - submit_time = dt.datetime.strptime(start_datetime_list[4], "%H:%M:%S").time() - - # Build datetime objects (explicitly UTC to avoid local timezone interpretation) - start_date_object = dt.date(year, month, day) - start_time = dt.datetime.strptime(start_datetime_list[0], "%H:%M:%S").time() - start_datetime_object = dt.datetime.combine( - start_date_object, start_time, tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_datetime_object.timestamp()) * 1000 - - # Calculate queue time - submit_datetime_object = dt.datetime.combine( - start_date_object, submit_time, tzinfo=dt.timezone.utc - ) - queue_time = int((start_datetime_object - submit_datetime_object).total_seconds()) - - # Calculate run time - end_time = dt.datetime.strptime(end_time_list[0], "%H:%M:%S").time() - end_datetime_object = dt.datetime.combine( - start_date_object, end_time, tzinfo=dt.timezone.utc - ) - run_time = int((end_datetime_object - start_datetime_object).total_seconds()) - - status = 0 - - return { - "submitTime": utc_timestamp, - "queueTime": queue_time, - "runTime": run_time, - "status": status, - } diff --git a/parsing/handlers/coffea_parser.py b/parsing/handlers/coffea_parser.py deleted file mode 100644 index 13921df7..00000000 --- a/parsing/handlers/coffea_parser.py +++ /dev/null @@ -1,66 +0,0 @@ -import datetime as dt - - -def parse_coffea_log(path): - """Parse Coffea analysis log file for timing information. - - Args: - path: Path to coffea_hist.log file - - Returns: - dict: Parsed timing data with keys: - - submitTime: UTC timestamp in milliseconds - - queueTime: Queue time in seconds (always 0) - - runTime: Execution time in seconds - - frequency: Processing frequency in kHz - - status: Exit status (0 = success) - """ - print(f"[Coffea NTuple->Hist] Parsing {path.name}") - - with open(path) as f: - file_lines = f.readlines() - - # Parse execution time and frequency from line 2 - # Format: "... execution time: 205.45 s ( 89.10 kHz)" - line_list = file_lines[1].split(" ") - run_time = round(float(line_list[3])) - frequency = round(float(line_list[-2])) - - # Parse UTC timestamp from end of file - # Format: "start_time_utc=2025-12-17T00:48:07.953454Z" - start_time_line = None - - for line in file_lines: - if line.startswith("start_time_utc="): - start_time_line = line.strip() - break - - if not start_time_line: - raise ValueError("No start_time_utc found in log file") - - # Extract timestamp string after the = sign - start_time_str = start_time_line.split("=")[1] - - # Parse ISO 8601 format with Z suffix (UTC) - # Format: 2025-12-17T00:48:07.953454Z - start_dt = dt.datetime.fromisoformat(start_time_str.rstrip("Z")).replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_dt.timestamp() * 1000) - - status = 0 - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("coffea_hist.log", parse_coffea_log) diff --git a/parsing/handlers/eventloop_parser.py b/parsing/handlers/eventloop_parser.py deleted file mode 100644 index e7dcd3e2..00000000 --- a/parsing/handlers/eventloop_parser.py +++ /dev/null @@ -1,37 +0,0 @@ -import datetime as dt - - -def parse_eventloop_log(path): - print(f"[EventLoop] Parsing {path.name}") - - with open(path) as f: - lines = f.readlines() - - # EventLoop benchmark info - block = lines[-8:] - - start_datetime = block[1].split("=", 1)[1].strip() - run_time = int(float(block[3].split("=", 1)[1].strip())) - frequency = int(float(block[-3].split("=", 1)[1].strip())) - - start_dt = dt.datetime.fromisoformat(start_datetime.rstrip("Z")).replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(start_dt.timestamp() * 1000) - - status = 0 - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("eventloop_arrays.log", parse_eventloop_log) diff --git a/parsing/handlers/evnt_parser.py b/parsing/handlers/evnt_parser.py deleted file mode 100644 index cd2ad17b..00000000 --- a/parsing/handlers/evnt_parser.py +++ /dev/null @@ -1,23 +0,0 @@ -"""EVNT generation log parser. - -Parses logs from ATLAS Monte Carlo event generation jobs to extract timing information. -""" - -from parsing.handlers.base_parser import parse_atlas_log - - -def parse_evnt_log(path): - """Parse EVNT generation log file. - - Args: - path: Path to log.generate file - - Returns: - dict: Parsed timing data - """ - return parse_atlas_log(path, log_name="EVNT") - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("log.generate", parse_evnt_log) diff --git a/parsing/handlers/fastframes_parser.py b/parsing/handlers/fastframes_parser.py deleted file mode 100644 index ff9c48a2..00000000 --- a/parsing/handlers/fastframes_parser.py +++ /dev/null @@ -1,56 +0,0 @@ -import datetime as dt - -from parsing.utils.text_utils import strip_ansi - -date_format = "%Y-%m-%d %H:%M:%S" - - -def elapsed_to_seconds(s): - s = s.rstrip("m") - minutes, seconds = map(int, s.split(":")) - return minutes * 60 + seconds - - -def parse_fastframes_log(path): - print(f"[FastFrames] Parsing {path.name}") - with open(path) as f: - file_lines = f.readlines() - N = len(file_lines) - cleaned_lines = [strip_ansi(lines) for lines in file_lines[N - 2 : N]] - line1 = cleaned_lines[0].split(" ") # processed/total events, elapsed time - elapsed_time = line1[3] - processed_events = int(line1[13]) - if processed_events == 18304905: - status = 0 - else: - status = 1 - - line2 = cleaned_lines[1].split(" ") - date = line2[13] - time = line2[14] - - combined = f"{date} {time}" - - # It was submitted to the batch so it's in UTC TimeZone already - dt_obj = dt.datetime.strptime(combined, "%d-%m-%Y %H:%M:%S").replace( - tzinfo=dt.timezone.utc - ) - utc_timestamp = int(dt_obj.timestamp() * 1000) - - run_time = int(elapsed_to_seconds(elapsed_time)) - frequency = int((processed_events / run_time) / 1000) - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "frequency": frequency, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("fastframes.log", parse_fastframes_log) diff --git a/parsing/handlers/rucio_parser.py b/parsing/handlers/rucio_parser.py deleted file mode 100644 index d4eacab4..00000000 --- a/parsing/handlers/rucio_parser.py +++ /dev/null @@ -1,65 +0,0 @@ -from collections import deque -import datetime as dt - -from parsing.utils.text_utils import strip_ansi - -date_format = "%Y-%m-%d %H:%M:%S" - - -def parse_rucio_log(path): - print(f"Rucio Parsing {path.name}") - - first_line = None - last_lines = deque(maxlen=12) - - with open(path) as f: - for line in f: - if "Processing 1 item(s) for input" in line and first_line is None: - first_line = line - last_lines.append(line) - - payload_line = last_lines[-1] - last_line = last_lines[0] if len(last_lines) == 12 else None - - first_line = strip_ansi(first_line).split(" ") - start_date_string = first_line[0] - start_time_string = first_line[1].split(",")[0] - - last_line = strip_ansi(last_line).split(" ") - end_date_string = last_line[0] - end_time_string = last_line[1].split(",")[0] - - # Obtaining the payload for status check; casted as int - payload = int(payload_line.split("\t")[0]) - if payload != 0: - status = 0 - else: - status = 1 - - # Creating start and end time objects (explicitly UTC to avoid local timezone interpretation) - start_datetime_string = start_date_string + " " + start_time_string - start_dt = dt.datetime.strptime(start_datetime_string, date_format).replace( - tzinfo=dt.timezone.utc - ) - end_datetime_string = end_date_string + " " + end_time_string - end_dt = dt.datetime.strptime(end_datetime_string, date_format).replace( - tzinfo=dt.timezone.utc - ) - - # Obtains timestamp and run_time - utc_timestamp = int(start_dt.timestamp()) * 1000 - run_time = int((end_dt - start_dt).total_seconds()) - - dicti = { - "submitTime": utc_timestamp, - "queueTime": 0, - "runTime": run_time, - "status": status, - } - - return dicti - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("rucio.log", parse_rucio_log) diff --git a/parsing/handlers/truth3_parser.py b/parsing/handlers/truth3_parser.py deleted file mode 100644 index e566e260..00000000 --- a/parsing/handlers/truth3_parser.py +++ /dev/null @@ -1,23 +0,0 @@ -"""TRUTH3 derivation log parser. - -Parses logs from ATLAS TRUTH3 derivation jobs to extract timing information. -""" - -from parsing.handlers.base_parser import parse_atlas_log - - -def parse_truth3_log(path): - """Parse TRUTH3 derivation log file. - - Args: - path: Path to log.EVNTtoDAOD or log.Derivation file - - Returns: - dict: Parsed timing data - """ - return parse_atlas_log(path, log_name="TRUTH3") - - -# Registers this parsing script with the Class -def register(parser): - parser.register_parsers("log.EVNTtoDAOD", parse_truth3_log) diff --git a/parsing/schema/payload.schema.json b/parsing/schema/payload.schema.json index 00544257..d1a743b7 100644 --- a/parsing/schema/payload.schema.json +++ b/parsing/schema/payload.schema.json @@ -55,6 +55,11 @@ "description": "Frequency or event processing rate (kHz)", "minimum": 0 }, + "setupTime": { + "type": "integer", + "description": "Environment setup time in seconds (setupATLAS + asetup/lsetup)", + "minimum": 0 + }, "status": { "type": "integer", "description": "Job exit status (0 = success, non-zero = failure)", diff --git a/parsing/scripts/ci_parse.py b/parsing/scripts/ci_parse.py index 90f70a65..c6056750 100755 --- a/parsing/scripts/ci_parse.py +++ b/parsing/scripts/ci_parse.py @@ -13,14 +13,7 @@ from rich.panel import Panel from rich.syntax import Syntax -from parsing.handlers import ( - rucio_parser, - coffea_parser, - fastframes_parser, - truth3_parser, - evnt_parser, - eventloop_parser, -) +from parsing import base_parser # Initialize rich console console = Console() @@ -92,21 +85,7 @@ def parse_log( f"[bold cyan]Payload file:[/bold cyan] {payload_file} ({payload_size} bytes)" ) - # Parse based on log type - if log_type == "rucio": - data = rucio_parser.parse_rucio_log(log_path) - elif log_type == "evnt": - data = evnt_parser.parse_evnt_log(log_path) - elif log_type == "truth3": - data = truth3_parser.parse_truth3_log(log_path) - elif log_type == "coffea": - data = coffea_parser.parse_coffea_log(log_path) - elif log_type == "eventloop": - data = eventloop_parser.parse_eventloop_log(log_path) - elif log_type == "fastframes": - data = fastframes_parser.parse_fastframes_log(log_path) - else: - raise ValueError(f"Unknown log type: {log_type}") + data = base_parser.parse_atlas_log(log_path) # Add common fields to all parsed data data["job"] = job diff --git a/parsing/tests/test_parsers.py b/parsing/tests/test_parsers.py index 8863b1bc..22bd29bc 100644 --- a/parsing/tests/test_parsers.py +++ b/parsing/tests/test_parsers.py @@ -1,101 +1,201 @@ -"""Test suite for benchmark log parsers. - -This module tests all parsers against example logs to ensure -they correctly extract timing and status information. These tests -serve as a regression suite when refactoring parser code. -""" - -from pathlib import Path - -# Import all parsers -from parsing.handlers.truth3_parser import parse_truth3_log -from parsing.handlers.evnt_parser import parse_evnt_log -from parsing.handlers.rucio_parser import parse_rucio_log -from parsing.handlers.coffea_parser import parse_coffea_log -from parsing.handlers.fastframes_parser import parse_fastframes_log - -# Path to example logs directory -EXAMPLE_LOGS = Path(__file__).parent.parent / "example-logs" - - -class TestTruth3Parser: - """Tests for TRUTH3 derivation log parser.""" - - def test_parse_truth3_example_log(self): - """Test parsing of example TRUTH3 derivation log.""" - log_file = EXAMPLE_LOGS / "log.Derivation" - result = parse_truth3_log(log_file) - - # Validate actual values from example log (not just key existence) - # These expected values come from running parser on example log - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216819000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 48, "Runtime should be 48 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestEvntParser: - """Tests for EVNT generation log parser.""" - - def test_parse_evnt_example_log(self): - """Test parsing of example EVNT generation log.""" - log_file = EXAMPLE_LOGS / "log.generate" - result = parse_evnt_log(log_file) - - # Validate actual values to catch calculation regressions - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216848000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 2418, "Runtime should be 2418 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestRucioParser: - """Tests for Rucio data download log parser.""" - - def test_parse_rucio_example_log(self): - """Test parsing of example Rucio download log.""" - log_file = EXAMPLE_LOGS / "rucio.log" - result = parse_rucio_log(log_file) - - # Check actual parsed values - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765216822000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 31, "Runtime should be 31 seconds" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestFastFramesParser: - """Tests for FastFrames analysis log parser.""" - - def test_parse_fastframes_example_log(self): - """Test parsing of example FastFrames log.""" - log_file = EXAMPLE_LOGS / "fastframes.log" - result = parse_fastframes_log(log_file) - - # Check actual parsed values including frequency - # Timestamps are in UTC (logs are from UTC timezone systems) - assert result["submitTime"] == 1765217167000, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 345, "Runtime should be 345 seconds" - assert result["frequency"] == 53, "Frequency should be 53" - assert result["status"] == 0, "Status should be 0 (success)" - - -class TestCoffeaParser: - """Tests for Coffea analysis log parser.""" - - def test_parse_coffea_example_log(self): - """Test parsing of example Coffea log.""" - log_file = EXAMPLE_LOGS / "coffea_hist.log" - result = parse_coffea_log(log_file) - - # Check actual parsed values including frequency - # Timestamps are in UTC (parsed from ISO 8601 format with Z suffix) - assert result["submitTime"] == 1765932487953, "Submit time mismatch" - assert result["queueTime"] == 0, "Queue time should be 0" - assert result["runTime"] == 205, "Runtime should be 205 seconds" - assert result["frequency"] == 89, "Frequency should be 89 kHz" - assert result["status"] == 0, "Status should be 0 (success)" +"""Test suite for benchmark log parsers.""" + +import pytest + +from parsing.base_parser import parse_atlas_log, parse_benchmark_block + + +class TestParseBenchmarkBlock: + def test_parses_basic_block(self): + lines = [ + "some log line\n", + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:19Z\n", + "end_time_utc=2025-12-08T18:01:07Z\n", + "exit_status=0\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result == { + "start_time_utc": "2025-12-08T18:00:19Z", + "end_time_utc": "2025-12-08T18:01:07Z", + "exit_status": "0", + } + + def test_returns_last_block_when_multiple(self): + lines = [ + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:00Z\n", + "end_time_utc=2025-12-08T18:00:30Z\n", + "exit_status=1\n", + "=================\n", + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:01:00Z\n", + "end_time_utc=2025-12-08T18:01:30Z\n", + "exit_status=0\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result["start_time_utc"] == "2025-12-08T18:01:00Z" + assert result["exit_status"] == "0" + + def test_returns_empty_dict_if_no_block(self): + lines = ["some line\n", "another line\n"] + assert parse_benchmark_block(lines) == {} + + def test_preserves_extra_fields(self): + lines = [ + "=== BENCHMARK ===\n", + "start_time_utc=2025-12-08T18:00:00Z\n", + "end_time_utc=2025-12-08T18:00:30Z\n", + "exit_status=0\n", + "user_time_sec=25.5\n", + "max_rss_kb=512000\n", + "=================\n", + ] + result = parse_benchmark_block(lines) + assert result["user_time_sec"] == "25.5" + assert result["max_rss_kb"] == "512000" + + +class TestParseAtlasLog: + def test_parses_timing_fields(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "some header\n" + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:48Z\n" + "end_time_utc=2025-12-08T18:41:06Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216848000 + assert result["queueTime"] == 0 + assert result["runTime"] == 2418 + assert result["status"] == 0 + + def test_nonzero_exit_status(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "exit_status=127\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["status"] == 127 + + def test_missing_exit_status_defaults_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["status"] == 0 + + def test_raises_if_no_benchmark_block(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text("some log content without a benchmark block\n") + with pytest.raises(ValueError, match="No BENCHMARK block"): + parse_atlas_log(log_file) + + def test_uses_last_benchmark_block(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:10Z\n" + "exit_status=1\n" + "=================\n" + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:01:00Z\n" + "end_time_utc=2025-12-08T18:01:45Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216860000 + assert result["runTime"] == 45 + assert result["status"] == 0 + + def test_submit_time_utc_sets_submit_time_and_queue_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "submit_time_utc=2025-12-08T18:00:00Z\n" + "start_time_utc=2025-12-08T18:05:00Z\n" + "end_time_utc=2025-12-08T18:45:00Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216800000 # submit_time_utc in ms + assert result["queueTime"] == 300 # 5 minutes in queue + assert result["runTime"] == 2400 # 40 minutes running + + def test_missing_submit_time_uses_start_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["submitTime"] == 1765216800000 + assert result["queueTime"] == 0 + + def test_setup_times_produce_setup_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:30:00Z\n" + "setup_start_time_utc=2025-12-08T18:00:00Z\n" + "setup_end_time_utc=2025-12-08T18:02:30Z\n" + "exit_status=0\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["setupTime"] == 150 # 2.5 minutes in seconds + + def test_missing_setup_times_omits_setup_time(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert "setupTime" not in result + + def test_clock_skew_on_queue_time_clamps_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "submit_time_utc=2025-12-08T18:00:02Z\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:00:30Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["queueTime"] == 0 + + def test_clock_skew_on_setup_time_clamps_to_zero(self, tmp_path): + log_file = tmp_path / "log.generate" + log_file.write_text( + "=== BENCHMARK ===\n" + "start_time_utc=2025-12-08T18:00:00Z\n" + "end_time_utc=2025-12-08T18:30:00Z\n" + "setup_start_time_utc=2025-12-08T18:00:01Z\n" + "setup_end_time_utc=2025-12-08T18:00:00Z\n" + "=================\n" + ) + result = parse_atlas_log(log_file) + assert result["setupTime"] == 0 diff --git a/parsing/utils/benchmark_utils.sh b/parsing/utils/benchmark_utils.sh new file mode 100644 index 00000000..be151ac9 --- /dev/null +++ b/parsing/utils/benchmark_utils.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# benchmark_utils.sh +# Shared utilities for ATLAS benchmark scripts + +# Parse /usr/bin/time -v output from a log file and extract key metrics +# Usage: extract_time_metrics +extract_time_metrics() { + local log_file=$1 + + grep -E "User time|System time|Percent of CPU|Elapsed \(wall clock\)|Maximum resident set size|Major \(requiring I/O\)|Minor \(reclaiming|Voluntary context|Involuntary context|Exit status" "${log_file}" | \ + awk -F': ' ' + /User time/ { user=$2 } + /System time/ { sys=$2 } + /Percent of CPU/ { cpu=$2; gsub(/%/,"",cpu) } + /Elapsed \(wall clock\)/ { elapsed=$2 } + /Maximum resident set/ { maxrss=$2 } + /Major.*page faults/ { majflt=$2 } + /Minor.*page faults/ { minflt=$2 } + /Voluntary context/ { vcswitch=$2 } + /Involuntary context/ { ivcswitch=$2 } + /Exit status/ { exit_status=$2 } + END { + print "user_time_sec=" user + print "sys_time_sec=" sys + print "cpu_percent=" cpu + print "elapsed_time=" elapsed + print "max_rss_kb=" maxrss + print "major_page_faults=" majflt + print "minor_page_faults=" minflt + print "voluntary_ctx_switches=" vcswitch + print "involuntary_ctx_switches=" ivcswitch + print "exit_status=" exit_status + } + ' +} + +# Parse rucio download metrics from a rucio.log file +# Usage: extract_rucio_metrics +extract_rucio_metrics() { + local log_file=$1 + + awk ' + /Total files \(DID\)/ { total_did=$NF } + /Total files \(filtered\)/ { total_filtered=$NF } + /Downloaded files/ { downloaded=$NF } + /Files already found locally/ { already_local=$NF } + /Files that cannot be downloaded/ { failed=$NF } + /^[0-9]+[[:space:]]/ { du_kb=$1 } + END { + print "rucio_total_files_did=" total_did + print "rucio_total_files_filtered=" total_filtered + print "rucio_downloaded_files=" downloaded + print "rucio_already_local=" already_local + print "rucio_failed_files=" failed + print "rucio_du_kb=" du_kb + } + ' "${log_file}" +} + +# Append standardized benchmark block to a log file +# Usage: append_benchmark [setup_start] [setup_end] [mode] +# Reads SUBMIT_TIME from environment if set (injected by cron/submit scripts via HTCondor). +append_benchmark() { + local log_file=$1 + local start_time=$2 + local end_time=$3 + local setup_start=${4:-} + local setup_end=${5:-} + local mode=${6:-time_v} + + local extra_metrics="" + case "${mode}" in + time_v) extra_metrics=$(extract_time_metrics "${log_file}") ;; + rucio) extra_metrics=$(extract_rucio_metrics "${log_file}") ;; + none) ;; + esac + + { + echo "=== BENCHMARK ===" + [[ -n "${SUBMIT_TIME:-}" ]] && echo "submit_time_utc=${SUBMIT_TIME}" + echo "start_time_utc=${start_time}" + echo "end_time_utc=${end_time}" + [[ -n "${setup_start}" ]] && echo "setup_start_time_utc=${setup_start}" + [[ -n "${setup_end}" ]] && echo "setup_end_time_utc=${setup_end}" + [[ -n "${extra_metrics}" ]] && printf '%s\n' "${extra_metrics}" + echo "=================" + } >> "${log_file}" +} diff --git a/parsing/utils/text_utils.py b/parsing/utils/text_utils.py deleted file mode 100644 index 30fe0e9d..00000000 --- a/parsing/utils/text_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Text processing utilities for log parsing. - -This module provides common text manipulation functions used across -different log parsers. -""" - -import re - - -# Regex pattern to match ANSI escape sequences (color codes, etc.) -ANSI_ESCAPE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") - - -def strip_ansi(text): - """Remove ANSI escape sequences from text. - - ANSI escape sequences are used for terminal colors and formatting. - This function removes them to get clean text for parsing. - - Args: - text (str): Text potentially containing ANSI codes - - Returns: - str: Text with ANSI codes removed - - Example: - >>> strip_ansi("\\x1B[32mGreen text\\x1B[0m") - 'Green text' - """ - return ANSI_ESCAPE.sub("", text)