From d9fea5c87fd687e4804965f211ed7fde3a23250f Mon Sep 17 00:00:00 2001 From: juvanden Date: Thu, 6 Feb 2025 15:18:14 +0100 Subject: [PATCH 1/3] remove the Nan values in scale and pdf weight columns when counting number of weights. Required as some events have all weights set to Nan. --- columnflow/production/cms/pdf.py | 2 +- columnflow/production/cms/scale.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/columnflow/production/cms/pdf.py b/columnflow/production/cms/pdf.py index 31946283f..de94651b8 100644 --- a/columnflow/production/cms/pdf.py +++ b/columnflow/production/cms/pdf.py @@ -78,7 +78,7 @@ def pdf_weights( ) # check for the correct amount of weights - n_weights = ak.num(events.LHEPdfWeight, axis=1) + n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEPdfWeight)), axis=1) bad_mask = (n_weights != 101) & (n_weights != 103) # write ones in case there are no weights at all diff --git a/columnflow/production/cms/scale.py b/columnflow/production/cms/scale.py index a920534e2..f1a91bdb9 100644 --- a/columnflow/production/cms/scale.py +++ b/columnflow/production/cms/scale.py @@ -86,7 +86,7 @@ def murmuf_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: Resources: - https://cms-nanoaod-integration.web.cern.ch/integration/master/mc94X_doc.html """ - n_weights = ak.num(events.LHEScaleWeight, axis=1) + n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEScaleWeight)), axis=1) # in rare cases, some events might have 0 weights non_zero_mask = n_weights > 0 @@ -177,7 +177,9 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar Resources: - https://cms-nanoaod-integration.web.cern.ch/integration/master/mc94X_doc.html """ - n_weights = ak.num(events.LHEScaleWeight, axis=1) + + # remove nan values in LHEScaleWeight columns for checking number of available weights + n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEScaleWeight)), axis=1) # in rare cases, some events might have 0 weights non_zero_mask = n_weights > 0 From 396d48456f07497c793cf653b3f5d2078b5258d1 Mon Sep 17 00:00:00 2001 From: juvanden Date: Thu, 6 Feb 2025 16:17:14 +0100 Subject: [PATCH 2/3] fixes to pdf and scale variations. Now when an event has Nan values in the weights, the weights are set to zero? --- columnflow/production/cms/pdf.py | 29 ++++++++++++++++++++++++++- columnflow/production/cms/scale.py | 32 +++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/columnflow/production/cms/pdf.py b/columnflow/production/cms/pdf.py index de94651b8..756fade53 100644 --- a/columnflow/production/cms/pdf.py +++ b/columnflow/production/cms/pdf.py @@ -95,7 +95,7 @@ def pdf_weights( frac = ak.sum(bad_mask) / len(events) * 100 logger.warning( "the number of LHEPdfWeights is expected to be 101 or 103, but also found values " - f"'{bad_values}' in dataset {self.dataset_inst.name}, will set pdf weights to 1 for " + f"'{bad_values}' in dataset {self.dataset_inst.name}, will set pdf weights to 0 for " f"these events ({frac:.2f}%)", ) @@ -151,6 +151,33 @@ def pdf_weights( }[outlier_log_mode] msg_func(msg) + if ak.any(bad_mask) & ~ak.all(bad_mask): + # catch events where the number of weights is unexpected + occurances = ak.sum(bad_mask) + frac = occurances / len(stddev) * 100 + msg = ( + f"in dataset {self.dataset_inst.name}, there are {occurances} ({frac:.2f}%) " + "events where the number of (non Nan) weights is unexpected" + ) + + if outlier_action == "remove": + # set all pdf weights to 0 when the *outlier_threshold* is passed + events = set_ak_column_f32(events, "pdf_weight", ak.where(bad_mask, 0, events.pdf_weight)) + events = set_ak_column_f32(events, "pdf_weight_up", ak.where(bad_mask, 0, events.pdf_weight_up)) + events = set_ak_column_f32(events, "pdf_weight_down", ak.where(bad_mask, 0, events.pdf_weight_down)) + + msg += "; the nominal/up/down pdf_weight columns have been set to 0 for these events" + elif outlier_action == "raise": + raise Exception(msg) + + msg_func = { + "none": lambda msg: None, + "info": logger.info, + "warning": logger.warning, + "debug": logger.debug, + }[outlier_log_mode] + msg_func(msg) + invalid_pdf_weight = (pdf_weight_nominal == 0) if ak.any(invalid_pdf_weight): # set all pdf weights to 0 when the nominal pdf weight is 0 diff --git a/columnflow/production/cms/scale.py b/columnflow/production/cms/scale.py index f1a91bdb9..0bc3f2242 100644 --- a/columnflow/production/cms/scale.py +++ b/columnflow/production/cms/scale.py @@ -86,7 +86,7 @@ def murmuf_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: Resources: - https://cms-nanoaod-integration.web.cern.ch/integration/master/mc94X_doc.html """ - n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEScaleWeight)), axis=1) + n_weights = ak.num(events.LHEScaleWeight, axis=1) # in rare cases, some events might have 0 weights non_zero_mask = n_weights > 0 @@ -154,6 +154,21 @@ def murmuf_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: weights[non_zero_mask] = murf_weights[:, indices[index_name]] events = set_ak_column_f32(events, column, weights) + # check if LHEScaleWeight is Nan in specific events and set weights to 0 + has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1) + if ak.any(has_nan_values): + logger.warning( + f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events" + r"Saving zeros for '{murmuf,mur,muf}_weight' of these events", + ) + for postfix in ["", "_up", "_down"]: + events = set_ak_column_f32(events, f"murmuf_weight{postfix}", ak.where( + has_nan_values, 0, events[f"murmuf_weight{postfix}"])) + events = set_ak_column_f32(events, f"mur_weight{postfix}", ak.where( + has_nan_values, 0, events[f"mur_weight{postfix}"])) + events = set_ak_column_f32(events, f"muf_weight{postfix}", ak.where( + has_nan_values, 0, events[f"muf_weight{postfix}"])) + return events @@ -230,6 +245,21 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar events = set_ak_column_f32(events, "murmuf_envelope_weight_down", ak.min(murf_weights, axis=1)) events = set_ak_column_f32(events, "murmuf_envelope_weight_up", ak.max(murf_weights, axis=1)) + # check if LHEScaleWeight is Nan in specific events and set weights to 0 + has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1) + if ak.any(has_nan_values): + logger.warning( + f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events" + r"Saving zeros for '{murmuf,mur,muf}_weight' of these events", + ) + for postfix in ["", "_up", "_down"]: + events = set_ak_column_f32(events, "murmuf_envelope_weight", ak.where( + has_nan_values, 0, events.murmuf_envelope_weight)) + events = set_ak_column_f32(events, "murmuf_envelope_weight_down", ak.where( + has_nan_values, 0, events.murmuf_envelope_weight_down)) + events = set_ak_column_f32(events, "murmuf_envelope_weight_up", ak.where( + has_nan_values, 0, events.murmuf_envelope_weight_up)) + return events From bd408cb9ded722f6f504d13dbc165e8a0c7f414b Mon Sep 17 00:00:00 2001 From: juvanden Date: Thu, 6 Feb 2025 16:29:15 +0100 Subject: [PATCH 3/3] small bug fix in murmuf_envelope_weights --- columnflow/production/cms/scale.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/columnflow/production/cms/scale.py b/columnflow/production/cms/scale.py index 0bc3f2242..43f7e7cae 100644 --- a/columnflow/production/cms/scale.py +++ b/columnflow/production/cms/scale.py @@ -158,7 +158,7 @@ def murmuf_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1) if ak.any(has_nan_values): logger.warning( - f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events" + f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events. " r"Saving zeros for '{murmuf,mur,muf}_weight' of these events", ) for postfix in ["", "_up", "_down"]: @@ -194,7 +194,7 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar """ # remove nan values in LHEScaleWeight columns for checking number of available weights - n_weights = ak.num(ak.drop_none(ak.nan_to_none(events.LHEScaleWeight)), axis=1) + n_weights = ak.num(events.LHEScaleWeight, axis=1) # in rare cases, some events might have 0 weights non_zero_mask = n_weights > 0 @@ -245,11 +245,11 @@ def murmuf_envelope_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar events = set_ak_column_f32(events, "murmuf_envelope_weight_down", ak.min(murf_weights, axis=1)) events = set_ak_column_f32(events, "murmuf_envelope_weight_up", ak.max(murf_weights, axis=1)) - # check if LHEScaleWeight is Nan in specific events and set weights to 0 + # check if LHEScaleWeights are Nan in specific events and set weights to 0 has_nan_values = ak.all(ak.is_none(ak.nan_to_none(events.LHEScaleWeight), axis=-1), axis=-1) if ak.any(has_nan_values): logger.warning( - f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events" + f"All values of LHEScaleWeights are Nan in {ak.sum(has_nan_values)} events. " r"Saving zeros for '{murmuf,mur,muf}_weight' of these events", ) for postfix in ["", "_up", "_down"]: