Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
import cmsdb.campaigns.run3_2022_postEE_nano_tau_skim_2025_v1.bkgs
import cmsdb.campaigns.run3_2022_postEE_nano_tau_skim_2025_v1.data
import cmsdb.campaigns.run3_2022_postEE_nano_tau_skim_2025_v1.signal

import cmsdb.campaigns.run3_2022_postEE_nano_tau_skim_2025_v1.cp_signal
142 changes: 26 additions & 116 deletions cmsdb/campaigns/run3_2022_postEE_nano_tau_skim_2025_v1/bkgs.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# coding: utf-8

"""
signals for CP analysis from the 2022 post-EE campaign
"""

import cmsdb.processes as procs
from cmsdb.campaigns.run3_2022_postEE_nano_tau_skim_2025_v1 import campaign_run3_2022_postEE_nano_tau_skim_2025_v1 as cpn # TODO: adjust if needed

import re
from collections import OrderedDict

def _base_name(name: str) -> str:
m = re.match(r'^(.*)_ext\d+$', name)
return m.group(1) if m else name

def _ext_number(s: str) -> int:
m = re.search(r'_ext(\d+)$', s)
return int(m.group(1)) if m else 0

def _key_sort_key(key: str):
n = _ext_number(key)
# base (no ext) first, then _ext1, _ext2, ...
return (0, 0) if n == 0 else (1, n)

def add_merged_datasets(dataset_rows, cpn, procs):
"""
dataset_rows: iterable of (name, key_or_keys, n_evt, n_files, pid, proc)
Groups *_extX with their base sample, then calls cpn.add_dataset once per base.
"""
groups = {} # base_name -> accumulator
for name, key, n_evt, n_files, pid, proc in dataset_rows:
base = _base_name(name)
g = groups.get(base)
if g is None:
g = {
"name": base,
"proc": proc,
"id": None, # prefer non-ext id; fallback to first seen
"keys": OrderedDict(), # preserve insertion order, avoid dups
"n_events": 0,
"n_files": 0,
}
groups[base] = g

if g["proc"] != proc:
raise ValueError(f"Process mismatch for {base}: {g['proc']} vs {proc}")

if not re.search(r'_ext\d+$', name):
g["id"] = pid
elif g["id"] is None:
g["id"] = pid

# --- FIX: accept string OR list of strings for 'key' ---
keys_in = key if isinstance(key, (list, tuple)) else [key]
for k in keys_in:
if not isinstance(k, str):
raise TypeError(f"key must be a string, got {type(k).__name__}: {k}")
g["keys"][k] = True

g["n_events"] += int(n_evt)
g["n_files"] += int(n_files)

# emit one add per base sample with sorted keys (base first, then ext1, ext2, ...)
for base, g in groups.items():
keys = list(g["keys"].keys())
keys.sort(key=_key_sort_key)
cpn.add_dataset(
name=g["name"],
id=g["id"],
is_data=False,
processes=[getattr(procs, g["proc"])],
keys=keys,
n_files=g["n_files"],
n_events=g["n_events"],
)

# ---- your datasets (name, key, n_evt, n_files, pid, proc) ----

dataset_rows = [
#ggH SM production
("h_ggf_htt_sm_prod_sm_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_SM_Filtered_ProdAndDecay"], 19599725, 49, 22100000, "h_ggf_htt_sm_prod_sm"),
("h_ggf_htt_sm_prod_cpo_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_CPodd_Filtered_ProdAndDecay"], 21495773, 54, 22100010, "h_ggf_htt_sm_prod_cpo"),
("h_ggf_htt_sm_prod_mm_filtered", ["/GluGluHto2Tau_UncorrelatedDecay_MM_Filtered_ProdAndDecay"], 20689379, 52, 22100020, "h_ggf_htt_sm_prod_mm"),
("h_vbf_htt_sm_filtered", ["/VBFHto2Tau_UncorrelatedDecay_Filtered"], 14552639, 35, 22100030, "h_vbf_htt_sm"),
("zh_htt_sm_filtered", ["/ZHto2Tau_UncorrelatedDecay_Filtered"], 1863291, 6, 22100040, "zh_htt_sm"),
("wph_htt_sm_filtered", ["/WplusHto2Tau_UncorrelatedDecay_Filtered"], 2025321, 6, 22100050, "wph_htt_sm"),
("wmh_htt_sm_filtered", ["/WminusHto2Tau_UncorrelatedDecay_Filtered"], 1480135, 4, 22100060, "wmh_htt_sm"),
]

dataset_rows_cp = []
for name, key, n_evt, n_files, pid, proc in dataset_rows:
dataset_rows_cp.append((name, key, n_evt, n_files, pid, proc))
for idx, the_cp_var in enumerate(['htt_mm','htt_cpo','htt_flat']):
cp_name = name.replace('htt_sm', the_cp_var)
cp_proc = proc.replace('htt_sm', the_cp_var)
cp_pid=pid+idx+1
dataset_rows_cp.append((cp_name, key, n_evt, n_files, cp_pid, cp_proc))
add_merged_datasets(dataset_rows_cp, cpn, procs)
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"year": 2022,
"version": 14,
"tag": "postEE",
"postfix" : "EE",
"custom": {
"name": "run3_2022_postEE_v2_nano_tau_v14",
"creator": "desy",
Expand Down
2 changes: 1 addition & 1 deletion cmsdb/campaigns/run3_2022_postEE_v2_nano_tau_v14/ewk.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
name="dy_lep_madgraph",
id=220154049,
# is_mc=True,
processes=[procs.dy_lep], #,procs.dy_z2ee,procs.dy_z2mumu,procs.dy_z2tautau],
processes=[procs.dy_lep],
keys=["/DYto2L_M_50_madgraphMLM","/DYto2L_M_50_madgraphMLM_ext1",],
n_files=464,
n_events=494841164,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@
import cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1.data
import cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1.bkgs
import cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1.signal
import cmsdb.campaigns.run3_2022_preEE_nano_tau_skim_2025_v1.cp_signal

Loading
Loading