diff --git a/.gitmodules b/.gitmodules
index 6dc2212dd..cc55a2ef4 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,7 +1,7 @@
[submodule "modules/law"]
path = modules/law
- url = ../../riga/law.git
+ url = https://github.com/riga/law.git
[submodule "modules/order"]
path = modules/order
- url = ../../riga/order.git
+ url = https://github.com/riga/order.git
diff --git a/README.md b/README.md
index e5e6fb4ce..738e51a2a 100644
--- a/README.md
+++ b/README.md
@@ -33,25 +33,17 @@
Backend for columnar, fully orchestrated HEP analyses with pure Python, [law](https://github.com/riga/law) and [order](https://github.com/riga/order).
-Original source hosted at [GitHub](https://github.com/columnflow/columnflow).
+This project is for use within the Ghent CMS group. Original source hosted at [GitHub](https://github.com/columnflow/columnflow).
-## Note on current development
-
-This project is currently in a beta phase.
-The project setup, suggested workflows, definitions of particular tasks, and the signatures of various helper classes and functions are mostly frozen but could still be subject to changes in the near future.
-At this point (July 2023), four large-scale analyses based upon columnflow are being developed, and in the process, help test and verify various aspects of its core.
-The first released version is expected in the fall of 2023.
-However, if you would like to join early on, contribute or just give it a spin, feel free to get in touch!
-
-
+
@@ -59,11 +51,12 @@ However, if you would like to join early on, contribute or just give it a spin,
## Quickstart
-To create an analysis using columnflow, it is recommended to start from a predefined template (located in [analysis_templates](https://github.com/columnflow/columnflow/tree/master/analysis_templates)).
+To create an analysis using columnflow, it is recommended to start from a predefined template (located in [analysis_templates](https://github.com/GhentAnalysis/columnflow/tree/main/analysis_templates)).
The following command (no previous git clone required) interactively asks for a handful of names and settings, and creates a minimal, yet fully functioning project structure for you!
+The 'cms_minimal' flavor corresponds to the template provided by columnflow itself. 'Ghent_template' provides a more extensive example.
```shell
-bash -c "$(curl -Ls https://raw.githubusercontent.com/columnflow/columnflow/master/create_analysis.sh)"
+bash -c "$(curl -Ls https://raw.githubusercontent.com/GhentAnalysis/columnflow/main/create_analysis.sh)"
```
At the end of the setup, you will see further instructions and suggestions to run your first analysis tasks (example below).
@@ -86,7 +79,7 @@ Setup successfull! The next steps are:
Suggestions for tasks to run:
- a) Run the 'calibration -> selection -> reduction' pipeline for the first file of the
+ a) Run the 'calibration -> selection -> reduction' pipeline for the first file (--branch 0) of the
default dataset using the default calibrator and default selector
(enter the command below and 'tab-tab' to see all arguments or add --help for help)
> law run cf.ReduceEvents --version dev1 --branch 0
@@ -108,52 +101,14 @@ Setup successfull! The next steps are:
For a better overview of the tasks that are triggered by the commands below, checkout the current (yet stylized) [task graph](https://github.com/columnflow/columnflow/wiki#default-task-graph).
-## Projects using columnflow
-
-- [hh2bbtautau](https://github.com/uhh-cms/hh2bbtautau): HH → bb𝜏𝜏 analysis with CMS.
-- [hh2bbww](https://github.com/uhh-cms/hh2bbww): HH → bbWW analysis with CMS.
-- [topmass](https://github.com/uhh-cms/topmass): Top quark mass measurement with CMS.
-- [mttbar](https://github.com/uhh-cms/mttbar): Search for heavy resonances in ttbar events with CMS.
-- [analysis playground](https://github.com/uhh-cms/analysis_playground): A testing playground for HEP analyses.
-
-
-## Contributors
-
-
-
-
-
-
-
-
-
-
-
-This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification.
+## Other projects and original developers
+You can find a list of other projects using columnflow on the [original github](https://github.com/columnflow/columnflow).
+The main contributors to columnflow are also listed there.
## Development
- Source hosted at [GitHub](https://github.com/columnflow/columnflow)
-- Report issues, questions, feature requests on [GitHub Issues](https://github.com/columnflow/columnflow/issues)
+- Report issues, questions, feature requests for columnflow to [GitHub Issues](https://github.com/columnflow/columnflow/issues)
diff --git a/analysis_templates/cms_minimal/law.cfg b/analysis_templates/cms_minimal/law.cfg
index 764843759..0dd6ff0ce 100644
--- a/analysis_templates/cms_minimal/law.cfg
+++ b/analysis_templates/cms_minimal/law.cfg
@@ -67,11 +67,11 @@ log_array_function_runtime: False
[outputs]
# list of all used file systems
-wlcg_file_systems: wlcg_fs, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
+wlcg_file_systems: wlcg_fs_t2b_redirector, wlcg_fs, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
# list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to
# look for the correct fs per nano input file (in that order)
-lfn_sources: wlcg_fs_infn_redirector, wlcg_fs_global_redirector
+lfn_sources: wlcg_fs_t2b_redirector, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
# output locations per task family
# for local targets : "local[, LOCAL_FS_NAME or STORE_PATH]"
@@ -101,10 +101,22 @@ remote_lcg_setup: /cvmfs/grid.cern.ch/centos7-ui-200122/etc/profile.d/setup-c7-u
base: /
+[wlcg_fs_t2b_redirector]
+
+# set this to your desired location
+base: /pnfs/iihe/cms/ph/sc4
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+cache_mtime_patience: -1
+
+
[wlcg_fs]
# set this to your desired location
-base: root://eosuser.cern.ch/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/$CF_STORE_NAME
+base: root://eosuser.cern.ch//eos/cms
create_file_dir: True
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
diff --git a/analysis_templates/ghent_template/.flake8 b/analysis_templates/ghent_template/.flake8
new file mode 100644
index 000000000..b3b274697
--- /dev/null
+++ b/analysis_templates/ghent_template/.flake8
@@ -0,0 +1,10 @@
+[flake8]
+
+# line length of 100 is recommended, but set it to a forgiving value
+max-line-length = 120
+
+# codes of errors to ignore
+ignore = E128, E306, E402, E722, E731, W504, Q003
+
+# enforce double quotes
+inline-quotes = double
diff --git a/analysis_templates/ghent_template/.gitattributes b/analysis_templates/ghent_template/.gitattributes
new file mode 100644
index 000000000..0461245fd
--- /dev/null
+++ b/analysis_templates/ghent_template/.gitattributes
@@ -0,0 +1,5 @@
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.root filter=lfs diff=lfs merge=lfs -text
diff --git a/analysis_templates/ghent_template/.gitignore b/analysis_templates/ghent_template/.gitignore
new file mode 100644
index 000000000..e6043753d
--- /dev/null
+++ b/analysis_templates/ghent_template/.gitignore
@@ -0,0 +1,38 @@
+*.sublime-project
+*.sublime-workspace
+*.pyc
+*.log
+*.DS_Store
+*.egg-info
+*.pkl
+*.pdf
+*.png
+*.root
+*.npy
+*.npz
+*.h5
+*.hdf5
+*.json
+*.yaml
+*.pb
+*.out
+*.parquet
+.env_*.sh
+.env_*.sh.tmp
+.coverage
+coverage*.xml
+requirements_user.txt
+__pycache__
+dist
+build
+static
+docs/_build
+tmp
+store
+software
+data
+.data
+.law
+.setups
+.mypy_cache
+.vscode
diff --git a/analysis_templates/ghent_template/LICENSE b/analysis_templates/ghent_template/LICENSE
new file mode 100644
index 000000000..f288702d2
--- /dev/null
+++ b/analysis_templates/ghent_template/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/analysis_templates/ghent_template/README.md b/analysis_templates/ghent_template/README.md
new file mode 100644
index 000000000..85fe25852
--- /dev/null
+++ b/analysis_templates/ghent_template/README.md
@@ -0,0 +1,113 @@
+# __cf_analysis_name__ Analysis
+
+# Datasets and processes
+
+All processes and datasets are defined in the [cmsdb gitlab](https://gitlab.cern.ch/ghentanalysis/cmsdb/-/blob/master/cmsdb/).
+Processes are added to the analysis in the file [config/processes.py](__cf_module_name__/config/processes.py).
+Data- and MC-sets are added to the analysis in the file [config/datasets.py](__cf_module_name__/config/datasets.py).
+Note the difference between a process and a dataset. A process can correspond to multiple datasets. The other way around is currently not possible.
+
+The datasets (for ERA=a,b,c,d) are:
+- data_mu_ERA corresponding to SingleMuon
+- data_mumu_ERA corresponding to DoubleMuon
+- data_muoneg_ERA corresponding to MuonEG
+- data_egamma_ERA corresponding to EGamma
+
+The MC processes with corresponding datasets are
+- ttbar with corresponding datasets tt_sl_powheg and tt_dl_powheg
+- dy (Drell-Yan) with corresponding datasets dy_lept_m50_ht-RANGE_madgraph
+
+The analysis can be run over only selected datasets using the --datasets argument. Groupings of datasets are defined in [config/datasets.py](__cf_module_name__/config/config___cf_short_name_lc__.py). A similar scheme exists for processes.
+
+
+# Object Definition
+
+All objects collected in [selection/objects.py:object_selection](__cf_module_name__/selection/objects.py#L177).
+
+## Muons
+
+Defined in [selection/objects.py:muon_object](__cf_module_name__/selection/objects.py#L36).
+
+- $|\eta| < 2.4$
+- $p_T > 10$
+- $\texttt{miniPFRelIso all} < 0.4$
+- $\texttt{sip3d} < 8$
+- $d_{xy} < 0.05$
+- $d_z < 0.1$
+
+Defined additionally Tight Muons::
+- $\texttt{tightId}$
+
+## Electrons
+
+Defined in [selection/objects.py:electron_object](__cf_module_name__/selection/objects.py#L83).
+
+- $|\eta| < 2.5$
+- $p_T > 15$
+- $\texttt{miniPFRelIso all} < 0.4$
+- $\texttt{sip3d} < 8$
+- $d_{xy} < 0.05$
+- $d_z < 0.1$
+- at most one lost hit
+- is a PF candidate
+- with conversion veto applied
+- $\texttt{tightCharge} > 1$
+- without a muon closeby ($\\Delta R < 0.05$)
+
+## Jets
+
+Defined in [selection/objects.py:jet_object](__cf_module_name__/selection/objects.py#L132).
+
+- ak4 Jets (standard Jet collection in NanoAOD)
+- $|\eta| < 2.5$
+- $p_T > 30$
+- $\texttt{jetId} \\ge 2$
+- not containing a muon or lepton ($\\Delta R < 0.4$)
+
+
+# Calibration
+
+Currently only the JEC and JER corrections are implemented. Two procedures are defined:
+- Full JEC uncertainies, no JER: [calibration/default.py:default](__cf_module_name__/calibration/default.py#L21).
+- Only nominal JEC, but also JER: [calibration/default.py:skip_jecunc](__cf_module_name__/calibration/skip_jecunc.py#L50).
+
+The applied procedure can be specified at
+[config/config___cf_short_name_lc__.py:cfg.x.default_calibrator](__cf_module_name__/config/config___cf_short_name_lc__.py#L339).
+
+
+# Event selection
+
+The aim is to select $t\overline{t}$ events.
+Full default selection flow collected in [selection/default.py:default](__cf_module_name__/selection/default.py#L213).
+Different selections can be defined by writing a similar function, and changing the configuration at [config/config___cf_short_name_lc__.py:cfg.x.default_selector](__cf_module_name__/config/config___cf_short_name_lc__.py#L340).
+
+
+- triggers applied in [selection/trigger.py:default](__cf_module_name__/selection/trigger.py#L57)
+ - listed in [selection/trigger.py:add_triggers](__cf_module_name__/selection/trigger.py#L11)
+- lepton selection applied in [selection/default.py:lepton_selection](__cf_module_name__/selection/default.py#L81).
+ - remove Z resonance (same flavour, opposite sign, $|m_{\ell\ell} - 91| < 15$)
+ - leading lepton $p_T > 30$
+ - subleading lepton $p_T > 20$
+ - all leptons in the event should be tight
+- jet selection applied in [selection/default.py:jet_selection](__cf_module_name__/selection/default.py#L136).
+ - one b-tagged jet
+
+Note that selections are calculated as masks but not yet applied.
+
+# Categories / channels
+
+Four channels are defined in the configuration file, described in [config/categories.py](config/categories.py) and implemented in [categorization/example.py](__cf_module_name__/categorization/example.py).
+
+- $ee$ [selection/categories.py:catid_selection_2e](__cf_module_name__/selection/categories.py#L24)
+- $e\mu$ [selection/categories.py:catid_selection_1e1mu](__cf_module_name__/selection/categories.py#L33)
+- $\mu\mu$ [selection/categories.py:catid_selection_2mu](__cf_module_name__/selection/categories.py#L42)
+- inclusive [selection/categories.py:catid_selection_incl](__cf_module_name__/selection/categories.py#L14)
+
+
+# Resources
+
+- [columnflow](https://github.com/uhh-cms/columnflow)
+- [law](https://github.com/riga/law)
+- [order](https://github.com/riga/order)
+- [luigi](https://github.com/spotify/luigi)
+
diff --git a/analysis_templates/ghent_template/__cf_module_name__/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/__init__.py
new file mode 100644
index 000000000..32763ecb7
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/__init__.py
@@ -0,0 +1,8 @@
+# coding: utf-8
+
+
+from __cf_module_name__.columnflow_patches import patch_all
+
+
+# apply cf patches once
+patch_all()
diff --git a/analysis_templates/ghent_template/__cf_module_name__/analysis/__cf_short_name_lc__.py b/analysis_templates/ghent_template/__cf_module_name__/analysis/__cf_short_name_lc__.py
new file mode 100644
index 000000000..b619736c7
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/analysis/__cf_short_name_lc__.py
@@ -0,0 +1,8 @@
+
+"""
+Main analysis object for the __cf_short_name_lc__ analysis
+"""
+
+from __cf_short_name_lc__.analysis.create_analysis import create_analysis
+
+__cf_short_name_lc__ = create_analysis("__cf_short_name_lc__", 3, tags={"is_signal_region"})
diff --git a/analysis_templates/ghent_template/__cf_module_name__/analysis/create_analysis.py b/analysis_templates/ghent_template/__cf_module_name__/analysis/create_analysis.py
new file mode 100644
index 000000000..27715fbcb
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/analysis/create_analysis.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+
+"""
+Configuration of the ___cf_short_name_lc__ analysis.
+"""
+
+import os
+
+import law
+import order as od
+
+
+thisdir = os.path.dirname(os.path.abspath(__file__))
+
+
+def create_analysis(
+ name,
+ id,
+ **kwargs,
+) -> od.Analysis:
+
+ #
+ # the main analysis object
+ #
+
+ analysis_inst = od.Analysis(
+ name=name,
+ id=id,
+ **kwargs,
+ )
+
+ # analysis-global versions
+ analysis_inst.set_aux("versions", {
+ })
+
+ # files of sandboxes that might be required by remote tasks
+ # (used in cf.HTCondorWorkflow)
+ analysis_inst.x.bash_sandboxes = [
+ "$CF_BASE/sandboxes/cf.sh",
+ ]
+ default_sandbox = law.Sandbox.new(law.config.get("analysis", "default_columnar_sandbox"))
+ if default_sandbox.sandbox_type == "bash" and default_sandbox.name not in analysis_inst.x.bash_sandboxes:
+ analysis_inst.x.bash_sandboxes.append(default_sandbox.name)
+ # cmssw sandboxes that should be bundled for remote jobs in case they are needed
+ analysis_inst.x.cmssw_sandboxes = [
+ "$CF_BASE/sandboxes/cmssw_default.sh",
+ ]
+
+ # config groups for conveniently looping over certain configs
+ # (used in wrapper_factory)
+ analysis_inst.set_aux("config_groups", {})
+
+ #
+ # import campaigns and load configs
+ #
+
+ from __cf_short_name_lc__.config.config___cf_short_name_lc__ import add_config
+ from cmsdb.campaigns.run2_2018_nano_v9 import campaign_run2_2018_nano_v9
+
+ # default config
+ c18 = add_config( # noqa
+ analysis_inst,
+ campaign_run2_2018_nano_v9.copy(),
+ config_name="c18",
+ config_id=2,
+ )
+
+ # config with limited number of files
+ l18 = add_config( # noqa
+ analysis_inst,
+ campaign_run2_2018_nano_v9.copy(),
+ config_name="l18",
+ config_id=12,
+ limit_dataset_files=2,
+ )
+
+ return analysis_inst
diff --git a/analysis_templates/ghent_template/__cf_module_name__/calibration/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/calibration/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/calibration/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/calibration/default.py b/analysis_templates/ghent_template/__cf_module_name__/calibration/default.py
new file mode 100644
index 000000000..7854b3f78
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/calibration/default.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+
+"""
+Calibration methods.
+"""
+
+from columnflow.calibration import Calibrator, calibrator
+from columnflow.calibration.cms.jets import jec, jer, jec_nominal
+from columnflow.production.cms.seeds import deterministic_seeds
+from columnflow.util import maybe_import
+
+ak = maybe_import("awkward")
+
+
+@calibrator(
+ uses={deterministic_seeds},
+ produces={deterministic_seeds},
+)
+def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+ events = self[deterministic_seeds](events, **kwargs)
+
+ if self.dataset_inst.is_data:
+ events = self[jec_nominal](events, **kwargs)
+ else:
+ events = self[jec](events, **kwargs)
+
+ return events
+
+
+# NOTE:
+# the function together with its @default.init decorator allows to customise the initialization of the
+# calibration function as performed by the @calibrator decorator. Here, we extend the uses={...} and produces={...}
+# sets dynamically, because what is used and produced depends on whether we are processing MC or data.
+@default.init
+def default_init(self: Calibrator) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ if self.dataset_inst.is_data:
+ calibrators = {jec_nominal}
+ else:
+ calibrators = {jec}
+
+ self.uses |= calibrators
+ self.produces |= calibrators
+
+
+@calibrator(
+ uses={deterministic_seeds},
+ produces={deterministic_seeds},
+)
+def skip_jecunc(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+ """ only uses jec_nominal for test purposes """
+ events = self[deterministic_seeds](events, **kwargs)
+
+ if self.dataset_inst.is_data:
+ events = self[jec_nominal](events, **kwargs)
+ else:
+ events = self[jec_nominal](events, **kwargs)
+ events = self[jer](events, **kwargs)
+
+ return events
+
+
+# NOTE: see default_init
+@skip_jecunc.init
+def skip_jecunc_init(self: Calibrator) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ if self.dataset_inst.is_data:
+ calibrators = {jec_nominal}
+ else:
+ calibrators = {jec_nominal, jer}
+
+ self.uses |= calibrators
+ self.produces |= calibrators
diff --git a/analysis_templates/ghent_template/__cf_module_name__/categorization/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/categorization/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/categorization/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/categorization/example.py b/analysis_templates/ghent_template/__cf_module_name__/categorization/example.py
new file mode 100644
index 000000000..0ae64c0a0
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/categorization/example.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+
+"""
+Exemplary selection methods.
+"""
+
+from columnflow.categorization import Categorizer, categorizer
+from columnflow.util import maybe_import
+
+ak = maybe_import("awkward")
+
+
+#
+# categorizer functions used by categories definitions
+#
+
+@categorizer(uses={"event"})
+def cat_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ # fully inclusive selection
+ return events, ak.ones_like(events.event) == 1
+
+
+@categorizer(uses={"Jet.pt"})
+def cat_2j(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ # two or more jets
+ return events, ak.num(events.Jet.pt, axis=1) >= 2
diff --git a/analysis_templates/ghent_template/__cf_module_name__/columnflow_patches.py b/analysis_templates/ghent_template/__cf_module_name__/columnflow_patches.py
new file mode 100644
index 000000000..bea1a91e3
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/columnflow_patches.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+
+"""
+Collection of patches of underlying columnflow tasks.
+"""
+
+import os
+
+import law
+from columnflow.util import memoize
+
+
+logger = law.logger.get_logger(__name__)
+
+
+@memoize
+def patch_bundle_repo_exclude_files():
+ from columnflow.tasks.framework.remote import BundleRepo
+
+ # get the relative path to CF_BASE
+ cf_rel = os.path.relpath(os.environ["CF_BASE"], os.environ["__cf_short_name_uc___BASE"])
+
+ # amend exclude files to start with the relative path to CF_BASE
+ exclude_files = [os.path.join(cf_rel, path) for path in BundleRepo.exclude_files]
+
+ # add additional files
+ exclude_files.extend([
+ "docs", "tests", "data", "assets", ".law", ".setups", ".data", ".github",
+ ])
+
+ # overwrite them
+ BundleRepo.exclude_files[:] = exclude_files
+
+ logger.debug("patched exclude_files of cf.BundleRepo")
+
+
+@memoize
+def patch_htcondor_workflow():
+ from columnflow.tasks.framework.remote import HTCondorWorkflow
+
+ # change the max_runtime parameter default
+ HTCondorWorkflow.max_runtime._default = 0
+ logger.debug("patched max_runtime of cf.HTCondorWorkflow")
+
+ HTCondorWorkflow.htcondor_flavor._default = 'NO_STR'
+ logger.debug("patched flavor of cf.HTCondorWorkflow")
+
+
+@memoize
+def patch_all():
+ patch_bundle_repo_exclude_files()
+ patch_htcondor_workflow()
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/config/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/categories.py b/analysis_templates/ghent_template/__cf_module_name__/config/categories.py
new file mode 100644
index 000000000..6e3649f02
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/categories.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+
+"""
+Definition of categories.
+"""
+
+from collections import OrderedDict
+
+import law
+
+from columnflow.config_util import create_category_combinations
+from columnflow.util import call_once_on_config
+
+import order as od
+
+logger = law.logger.get_logger(__name__)
+
+
+@call_once_on_config()
+def add_categories_selection(config: od.Config) -> None:
+ """
+ Adds categories to a *config*, that are typically produced in `SelectEvents`.
+ """
+
+ config.x.regions = ("incl")
+ config.x.lepton_channels = ("2e", "1e1mu", "2mu")
+ config.x.lepton_channel_labels = {"2e": "$ee$", "1e1mu": "$e\mu$", "2mu": "$\mu\mu$"}
+
+ config.add_category(
+ name="incl",
+ id=1,
+ selection="catid_selection_incl",
+ label="Inclusive",
+ )
+
+ # add lepton categories defined in ___cf_short_name_lc__.selection.categories to the config
+ for lepton_channel in config.x.lepton_channels:
+
+ config.add_category(
+ name=lepton_channel,
+ selection=["catid_selection_{}".format(lepton_channel)],
+ label=config.x.lepton_channel_labels[lepton_channel],
+ )
+
+
+@call_once_on_config()
+def add_categories_production(config: od.Config) -> None:
+ """
+ Adds categories to a *config*, that are typically produced in `ProduceColumns`.
+ """
+
+ #
+ # switch existing categories to different production module
+ #
+
+ for lepton_channel in config.x.lepton_channels:
+
+ cat_lepton = config.get_category(lepton_channel)
+ cat_lepton.selection = ["catid_{}".format(lepton_channel)]
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/config___cf_short_name_lc__.py b/analysis_templates/ghent_template/__cf_module_name__/config/config___cf_short_name_lc__.py
new file mode 100644
index 000000000..831b24263
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/config___cf_short_name_lc__.py
@@ -0,0 +1,352 @@
+# coding: utf-8
+
+"""
+Configuration of the __cf_short_name_lc__ analysis.
+"""
+from __future__ import annotations
+
+import order as od
+from scinum import Number
+
+from columnflow.util import DotDict, maybe_import, four_vec
+from columnflow.config_util import (
+ verify_config_processes,
+)
+
+from __cf_short_name_lc__.config.styling import stylize_processes
+from __cf_short_name_lc__.config.datasets import add_datasets, configure_datasets
+from __cf_short_name_lc__.config.processes import add_processes
+from __cf_short_name_lc__.config.veto import add_vetoes
+from __cf_short_name_lc__.config.categories import add_categories_selection
+from __cf_short_name_lc__.config.variables import add_variables
+from __cf_short_name_lc__.config.shifts import add_shifts
+from __cf_short_name_lc__.selection.trigger import add_triggers
+
+ak = maybe_import("awkward")
+
+
+def add_config(
+ analysis: od.Analysis,
+ campaign: od.Campaign,
+ config_name: str | None = None,
+ config_id: int | None = None,
+ limit_dataset_files: int | None = None,
+) -> od.Config:
+ # validations
+ year = campaign.x.year
+ assert year in [2016, 2017, 2018] # only run 2 implemented
+ if year == 2016:
+ assert campaign.x.vfp in ["pre", "post"]
+
+ # only 2018 fully implemented
+ if year != 2018:
+ raise NotImplementedError("For now, only 2018 campaign is fully implemented")
+
+ cfg = analysis.add_config(campaign, name=config_name, id=config_id, tags=analysis.tags)
+
+ year2 = year % 100
+ corr_postfix = f"{campaign.x.vfp}VFP" if year == 2016 else ""
+ ecm = campaign.ecm
+
+ cfg.x.year = year
+ cfg.x.year2 = year2
+ cfg.x.corr_postfix = corr_postfix
+ cfg.x.ecm = ecm
+
+ add_processes(cfg, campaign)
+
+ add_triggers(cfg, campaign)
+ add_datasets(cfg, campaign)
+ add_vetoes(cfg)
+ configure_datasets(cfg, limit_dataset_files)
+
+ # verify that the root process of all datasets is part of any of the registered processes
+ verify_config_processes(cfg, warn=True)
+
+ # lumi values in inverse pb
+ # https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun2?rev=2#Combination_and_correlations
+ if year == 2016:
+ cfg.x.luminosity = Number(36310, {
+ "lumi_13TeV_2016": 0.01j,
+ "lumi_13TeV_correlated": 0.006j,
+ })
+ elif year == 2017:
+ cfg.x.luminosity = Number(41480, {
+ "lumi_13TeV_2017": 0.02j,
+ "lumi_13TeV_1718": 0.006j,
+ "lumi_13TeV_correlated": 0.009j,
+ })
+ elif year == 2018: # 2018
+ cfg.x.luminosity = Number(59830, {
+ "lumi_13TeV_2017": 0.015j,
+ "lumi_13TeV_1718": 0.002j,
+ "lumi_13TeV_correlated": 0.02j,
+ })
+
+ cfg.x.minbias_xs = Number(69.2, 0.046j)
+
+ # jec configuration
+ # https://twiki.cern.ch/twiki/bin/view/CMS/JECDataMC?rev=201
+ jerc_postfix = "APV" if year == 2016 and campaign.x.vfp == "post" else ""
+ cfg.x.jec = DotDict.wrap({
+ "campaign": f"Summer19UL{year2}{jerc_postfix}",
+ "version": {2016: "V7", 2017: "V5", 2018: "V5"}[year],
+ "jet_type": "AK4PFchs",
+ "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"],
+ "levels_for_type1_met": ["L1FastJet"],
+ "uncertainty_sources": [
+ # "AbsoluteStat",
+ # "AbsoluteScale",
+ # "AbsoluteSample",
+ # "AbsoluteFlavMap",
+ # "AbsoluteMPFBias",
+ # "Fragmentation",
+ # "SinglePionECAL",
+ # "SinglePionHCAL",
+ # "FlavorQCD",
+ # "TimePtEta",
+ # "RelativeJEREC1",
+ # "RelativeJEREC2",
+ # "RelativeJERHF",
+ # "RelativePtBB",
+ # "RelativePtEC1",
+ # "RelativePtEC2",
+ # "RelativePtHF",
+ # "RelativeBal",
+ # "RelativeSample",
+ # "RelativeFSR",
+ # "RelativeStatFSR",
+ # "RelativeStatEC",
+ # "RelativeStatHF",
+ # "PileUpDataMC",
+ # "PileUpPtRef",
+ # "PileUpPtBB",
+ # "PileUpPtEC1",
+ # "PileUpPtEC2",
+ # "PileUpPtHF",
+ # "PileUpMuZero",
+ # "PileUpEnvelope",
+ # "SubTotalPileUp",
+ # "SubTotalRelative",
+ # "SubTotalPt",
+ # "SubTotalScale",
+ # "SubTotalAbsolute",
+ # "SubTotalMC",
+ "Total",
+ # "TotalNoFlavor",
+ # "TotalNoTime",
+ # "TotalNoFlavorNoTime",
+ # "FlavorZJet",
+ # "FlavorPhotonJet",
+ # "FlavorPureGluon",
+ # "FlavorPureQuark",
+ # "FlavorPureCharm",
+ # "FlavorPureBottom",
+ # "TimeRunA",
+ # "TimeRunB",
+ # "TimeRunC",
+ # "TimeRunD",
+ "CorrelationGroupMPFInSitu",
+ "CorrelationGroupIntercalibration",
+ "CorrelationGroupbJES",
+ "CorrelationGroupFlavor",
+ "CorrelationGroupUncorrelated",
+ ],
+ })
+
+ # JER
+ # https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107
+ cfg.x.jer = DotDict.wrap({
+ "campaign": f"Summer19UL{year2}{jerc_postfix}",
+ "version": "JR" + {2016: "V3", 2017: "V2", 2018: "V2"}[year],
+ "jet_type": "AK4PFchs",
+ })
+
+ # JEC uncertainty sources propagated to btag scale factors
+ # (names derived from contents in BTV correctionlib file)
+ cfg.x.btag_sf_jec_sources = [
+ "", # total
+ "Absolute",
+ "AbsoluteMPFBias",
+ "AbsoluteScale",
+ "AbsoluteStat",
+ f"Absolute_{year}",
+ "BBEC1",
+ f"BBEC1_{year}",
+ "EC2",
+ f"EC2_{year}",
+ "FlavorQCD",
+ "Fragmentation",
+ "HF",
+ f"HF_{year}",
+ "PileUpDataMC",
+ "PileUpPtBB",
+ "PileUpPtEC1",
+ "PileUpPtEC2",
+ "PileUpPtHF",
+ "PileUpPtRef",
+ "RelativeBal",
+ "RelativeFSR",
+ "RelativeJEREC1",
+ "RelativeJEREC2",
+ "RelativeJERHF",
+ "RelativePtBB",
+ "RelativePtEC1",
+ "RelativePtEC2",
+ "RelativePtHF",
+ "RelativeSample",
+ f"RelativeSample_{year}",
+ "RelativeStatEC",
+ "RelativeStatFSR",
+ "RelativeStatHF",
+ "SinglePionECAL",
+ "SinglePionHCAL",
+ "TimePtEta",
+ ]
+
+ # b-tag working points
+ # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16preVFP?rev=6
+ # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16postVFP?rev=8
+ # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=15
+ # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=17
+ btag_key = f"2016{campaign.x.vfp}" if year == 2016 else year
+ cfg.x.btag_working_points = DotDict.wrap({
+ "deepjet": {
+ "loose": {"2016pre": 0.0508, "2016post": 0.0480, 2017: 0.0532, 2018: 0.0490}[btag_key],
+ "medium": {"2016pre": 0.2598, "2016post": 0.2489, 2017: 0.3040, 2018: 0.2783}[btag_key],
+ "tight": {"2016pre": 0.6502, "2016post": 0.6377, 2017: 0.7476, 2018: 0.7100}[btag_key],
+ },
+ "deepcsv": {
+ "loose": {"2016pre": 0.2027, "2016post": 0.1918, 2017: 0.1355, 2018: 0.1208}[btag_key],
+ "medium": {"2016pre": 0.6001, "2016post": 0.5847, 2017: 0.4506, 2018: 0.4168}[btag_key],
+ "tight": {"2016pre": 0.8819, "2016post": 0.8767, 2017: 0.7738, 2018: 0.7665}[btag_key],
+ },
+ })
+ cfg.x.btag_sf = ("deepJet_shape", cfg.x.btag_sf_jec_sources)
+
+ # names of electron correction sets and working points
+ # (used in the electron_sf producer)
+ cfg.x.electron_sf_names = ("UL-Electron-ID-SF", f"{year}{corr_postfix}", "wp80iso")
+ cfg.x.muon_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", f"{year}{corr_postfix}_UL")
+
+ # external files
+ json_mirror = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration"
+ year_short = str(year)[2:] # 20XX > XX
+ lumi_cert_site = f"https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions{year_short}/{ecm:g}TeV"
+ pu_reweighting_site = f"{lumi_cert_site}/PileUp/UltraLegacy"
+ goldenjsons = {
+ 2016: f"Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt",
+ 2017: f"Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt",
+ 2018: f"Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt",
+ }
+ cfg.x.external_files = DotDict.wrap({
+ # lumi files (golden run 2 only!!)
+ "lumi": {
+ "golden": (f"{lumi_cert_site}/Legacy_{year}/{goldenjsons[year]}", "v1"),
+ "normtag": ("modules/Normtags/normtag_PHYSICS.json", "v1"),
+ },
+
+ # jet energy correction
+ "jet_jerc": (f"{json_mirror}/POG/JME/{year}{corr_postfix}_UL/jet_jerc.json.gz", "v1"),
+
+ # electron scale factors
+ "electron_sf": (f"{json_mirror}/POG/EGM/{year}{corr_postfix}_UL/electron.json.gz", "v1"),
+
+ # muon scale factors
+ "muon_sf": (f"{json_mirror}/POG/MUO/{year}{corr_postfix}_UL/muon_Z.json.gz", "v1"),
+
+ # btag scale factor
+ "btag_sf_corr": (f"{json_mirror}/POG/BTV/{year}{corr_postfix}_UL/btagging.json.gz", "v1"),
+
+ # Pile up scale factor
+ "pu_sf": (f"{json_mirror}/POG/LUM/{year}{corr_postfix}_UL/puWeights.json.gz", "v1")
+ })
+
+ # process groups for conveniently looping over certain processs
+ # (used in wrapper_factory and during plotting)
+ cfg.x.process_groups = {
+ "test": ["tt_dl"],
+ "all": ["tt_dl", "dy", "data"],
+ "sim": ["tt_dl", "dy"],
+ }
+
+ # dataset groups for conveniently looping over certain datasets
+ # (used in wrapper_factory and during plotting)
+ cfg.x.dataset_groups = {
+ "test": ["tt_dl_powheg"],
+ "all": ["tt_dl_powheg", "dy*", "data*"],
+ "sim": ["tt_dl_powheg", "dy*"],
+ }
+
+ cfg.x.variable_groups = {
+ "default": ["n_jet"],
+ }
+
+ # category groups for conveniently looping over certain categories
+ # (used during plotting)
+ cfg.x.category_groups = {
+ "default": ["incl"],
+ }
+
+ # shift groups for conveniently looping over certain shifts
+ # (used during plotting)
+ cfg.x.event_weights = DotDict()
+ cfg.x.event_weights["normalization_weight"] = []
+ add_shifts(cfg)
+
+ cfg.x.shift_groups = {
+ "jer": ["nominal", "jer_up", "jer_down"],
+ "btag": ["nominal", "btag*"],
+ "all": cfg.shifts.names(),
+ }
+
+ # selector step groups for conveniently looping over certain steps
+ # (used in cutflow tasks)
+ cfg.x.selector_step_groups = {}
+
+ # custom method and sandbox for determining dataset lfns
+ cfg.x.get_dataset_lfns = None
+ cfg.x.get_dataset_lfns_sandbox = None
+
+ # whether to validate the number of obtained LFNs in GetDatasetLFNs
+ # (currently set to false because the number of files per dataset is truncated to 2)
+ cfg.x.validate_dataset_lfns = False
+
+ # columns to keep after certain steps
+ cfg.x.keep_columns = DotDict.wrap({
+ "cf.MergeSelectionMasks": {
+ "mc_weight", "normalization_weight", "process_id", "category_ids", "cutflow.*",
+ },
+ })
+
+ cfg.x.keep_columns["cf.ReduceEvents"] = (
+ {
+ # general event information
+ "run", "luminosityBlock", "event",
+ # columns added during selection, required in general
+ "mc_weight", "PV.npvs", "process_id", "category_ids", "deterministic_seed",
+ # weight-related columns
+ "pu_weight*", "pdf_weight*",
+ "murf_envelope_weight*", "mur_weight*", "muf_weight*",
+ "btag_weight*",
+ # extra columns
+ } | four_vec( # Jets
+ {"Jet"},
+ {"btagDeepFlavB", "btagDeepFlavCvB"},
+ ) | four_vec( # Leptons
+ {"Electron", "Muon", }
+ )
+ )
+
+ cfg.x.default_calibrator = "skip_jecunc" # skip jet energy correction up and down variation to save time in running
+ cfg.x.default_selector = "default"
+ cfg.x.default_producer = "default"
+ cfg.x.default_ml_model = None
+ cfg.x.default_inference_model = "example"
+ cfg.x.default_variables = ("n_jet",)
+
+ add_categories_selection(cfg)
+ add_variables(cfg)
+ stylize_processes(cfg)
+
+ return cfg
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/datasets.py b/analysis_templates/ghent_template/__cf_module_name__/config/datasets.py
new file mode 100644
index 000000000..2e7b24627
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/datasets.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+
+"""
+Dataset configuration for the Ghent Columnflow analysis template (based on tt).
+"""
+
+from __future__ import annotations
+
+import law
+import order as od
+import cmsdb.processes as procs
+from columnflow.tasks.external import GetDatasetLFNs
+# import cmsdb.processes as procs
+
+
+logger = law.logger.get_logger(__name__)
+
+
+def add_datasets(config: od.Config, campaign: od.Campaign):
+
+ # add datasets we need to study
+ dataset_names = {
+ "2018": [
+ # data
+ *[f"data_mumu_{era}" for era in ['a', 'b', 'c', 'd']],
+ *[f"data_egamma_{era}" for era in ['a', 'b', 'c', 'd']],
+ *[f"data_muoneg_{era}" for era in ['a', 'b', 'c', 'd']],
+ *[f"data_mu_{era}" for era in ['a', 'b', 'c', 'd']],
+
+ # backgrounds
+
+ # ewk
+ *[f"dy_lept_m50_ht-{htr}_madgraph" for htr in ['100to200', '200to400', '400to600',
+ '600to800', '800to1200', '1200to2500']],
+
+ # ttbar
+ "tt_dl_powheg",
+ "tt_sl_powheg"
+ ]}[f"{config.x.year}{config.x.corr_postfix}"]
+
+ # loop over all dataset names and add them to the config
+ for dataset_name in dataset_names:
+ config.add_dataset(campaign.get_dataset(dataset_name))
+
+
+def configure_datasets(config: od.Config, limit_dataset_files: int | None = None):
+
+ for dataset in config.datasets:
+ if limit_dataset_files:
+ # apply optional limit on the max. number of files per dataset
+ for info in dataset.info.values():
+ if info.n_files > limit_dataset_files:
+ info.n_files = limit_dataset_files
+
+ # adding tag info to datasets for data double counting removal
+ if dataset.name.startswith("data_egamma"):
+ dataset.add_tag("EGamma")
+ elif dataset.name.startswith("data_mumu"):
+ dataset.add_tag("DoubleMuon")
+ elif dataset.name.startswith("data_mu_"):
+ dataset.add_tag("SingleMuon")
+ elif dataset.name.startswith("data_muoneg"):
+ dataset.add_tag("MuonEG")
+
+ # for each dataset, select which triggers to require
+ # (and which to veto to avoid double counting events
+ # in recorded data)
+ if dataset.is_data:
+ prev_triggers = set()
+ for tag, triggers in config.x.trigger_matrix:
+ if dataset.has_tag(tag):
+ dataset.x.require_triggers = triggers
+ dataset.x.veto_triggers = prev_triggers
+ break
+ prev_triggers = prev_triggers | triggers
+
+ elif dataset.is_mc:
+ dataset.x.require_triggers = config.x.all_triggers
+
+ # add more tag info to datasets
+ if dataset.name.startswith(("t")):
+ dataset.x.has_top = True
+ dataset.add_tag("has_top")
+
+ # example of removing scale, pdf variations for a specific dataset
+ if dataset.name.startswith(("GluGLuToContinToZZ")):
+ dataset.add_tag("skip_scale")
+ dataset.add_tag("skip_pdf")
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/processes.py b/analysis_templates/ghent_template/__cf_module_name__/config/processes.py
new file mode 100644
index 000000000..a737a7c66
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/processes.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+
+"""
+Configuration of the Run 3 ttZ processes.
+"""
+
+import order as od
+from columnflow.config_util import get_root_processes_from_campaign
+
+
+def add_processes(config: od.Config, campaign: od.Campaign):
+ # get all root processes
+ procs = get_root_processes_from_campaign(campaign)
+
+ config.add_process(procs.n.data)
+
+ config.add_process(procs.n.tt)
+
+ config.add_process(procs.n.dy)
+
+ # How to add new processes:
+ # Add custom process to encapsulate all background processes:
+ bg_processes = ['dy']
+ background = config.add_process(
+ name="background",
+ id=9999, # cannot collide with ids defined in cmsdb though
+ label="Background",
+ xsecs = {campaign.ecm: sum([config.get_process(bg).get_xsec(campaign.ecm) for bg in bg_processes])}
+ )
+ for bg in bg_processes:
+ background.add_process(config.get_process(bg))
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/shifts.py b/analysis_templates/ghent_template/__cf_module_name__/config/shifts.py
new file mode 100644
index 000000000..afa0aa392
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/shifts.py
@@ -0,0 +1,189 @@
+# coding: utf-8
+
+"""
+Definition of shifts.
+"""
+
+from collections import OrderedDict
+
+import law
+import os
+import re
+import yaml
+
+from columnflow.util import DotDict, call_once_on_config
+
+import order as od
+
+logger = law.logger.get_logger(__name__)
+
+
+@call_once_on_config()
+def add_shifts(config: od.Config) -> None:
+ """
+ Adds categories to a *config*, that are typically produced in `SelectEvents`.
+ """
+
+ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent: bool):
+
+ for direction in ["up", "down"]:
+ shift = config.get_shift(od.Shift.join_name(shift_source, direction))
+ # format keys and values
+ inject_shift = lambda s: re.sub(r"\{([^_])", r"{_\1", s).format(**shift.__dict__)
+ _aliases = {inject_shift(key): inject_shift(value) for key, value in aliases.items()}
+ alias_type = "column_aliases_selection_dependent" if selection_dependent else "column_aliases"
+ # extend existing or register new column aliases
+ shift.set_aux(alias_type, shift.get_aux(alias_type, {})).update(_aliases)
+
+ config.add_shift(name="nominal", id=0)
+ config.add_shift(name="minbias_xs_up", id=7, type="shape")
+ config.add_shift(name="minbias_xs_down", id=8, type="shape")
+ add_shift_aliases(
+ "minbias_xs",
+ {
+ "pu_weight": "pu_weight_{name}",
+ "normalized_pu_weight": "normalized_pu_weight_{name}",
+ },
+ selection_dependent=False)
+
+ config.add_shift(name="top_pt_up", id=9, type="shape")
+ config.add_shift(name="top_pt_down", id=10, type="shape")
+ add_shift_aliases("top_pt", {"top_pt_weight": "top_pt_weight_{direction}"}, selection_dependent=False)
+
+ # lepton uncertainties
+ config.add_shift(name="e_sf_up", id=40, type="shape")
+ config.add_shift(name="e_sf_down", id=41, type="shape")
+ config.add_shift(name="e_trig_sf_up", id=42, type="shape")
+ config.add_shift(name="e_trig_sf_down", id=43, type="shape")
+ add_shift_aliases("e_sf", {"electron_weight": "electron_weight_{direction}"}, selection_dependent=False)
+
+ config.add_shift(name="mu_sf_up", id=50, type="shape")
+ config.add_shift(name="mu_sf_down", id=51, type="shape")
+ config.add_shift(name="mu_trig_sf_up", id=52, type="shape")
+ config.add_shift(name="mu_trig_sf_down", id=53, type="shape")
+ add_shift_aliases("mu_sf", {"muon_weight": "muon_weight_{direction}"}, selection_dependent=False)
+
+ # b-tagging uncertainties
+ btag_uncs = [
+ "hf", "lf", f"hfstats1_{config.x.year}", f"hfstats2_{config.x.year}",
+ f"lfstats1_{config.x.year}", f"lfstats2_{config.x.year}", "cferr1", "cferr2",
+ ]
+ for i, unc in enumerate(btag_uncs):
+ config.add_shift(name=f"btag_{unc}_up", id=100 + 2 * i, type="shape")
+ config.add_shift(name=f"btag_{unc}_down", id=101 + 2 * i, type="shape")
+ add_shift_aliases(
+ f"btag_{unc}",
+ {
+ "normalized_btag_weight": f"normalized_btag_weight_{unc}_" + "{direction}",
+ "normalized_njet_btag_weight": f"normalized_njet_btag_weight_{unc}_" + "{direction}",
+ },
+ selection_dependent=False,
+ )
+
+ config.add_shift(name="mur_up", id=201, type="shape")
+ config.add_shift(name="mur_down", id=202, type="shape")
+ config.add_shift(name="muf_up", id=203, type="shape")
+ config.add_shift(name="muf_down", id=204, type="shape")
+ config.add_shift(name="murf_envelope_up", id=205, type="shape")
+ config.add_shift(name="murf_envelope_down", id=206, type="shape")
+ config.add_shift(name="pdf_up", id=207, type="shape")
+ config.add_shift(name="pdf_down", id=208, type="shape")
+
+ for unc in ["mur", "muf", "murf_envelope", "pdf"]:
+ # add_shift_aliases(unc, {f"{unc}_weight": f"{unc}_weight_" + "{direction}"}, selection_dependent=False)
+ add_shift_aliases(
+ unc,
+ {f"normalized_{unc}_weight": f"normalized_{unc}_weight_" + "{direction}"},
+ selection_dependent=False,
+ )
+
+ all_jec_sources = [
+ "AbsoluteStat",
+ "AbsoluteScale",
+ "AbsoluteSample",
+ "AbsoluteFlavMap",
+ "AbsoluteMPFBias",
+ "Fragmentation",
+ "SinglePionECAL",
+ "SinglePionHCAL",
+ "FlavorQCD",
+ "TimePtEta",
+ "RelativeJEREC1",
+ "RelativeJEREC2",
+ "RelativeJERHF",
+ "RelativePtBB",
+ "RelativePtEC1",
+ "RelativePtEC2",
+ "RelativePtHF",
+ "RelativeBal",
+ "RelativeSample",
+ "RelativeFSR",
+ "RelativeStatFSR",
+ "RelativeStatEC",
+ "RelativeStatHF",
+ "PileUpDataMC",
+ "PileUpPtRef",
+ "PileUpPtBB",
+ "PileUpPtEC1",
+ "PileUpPtEC2",
+ "PileUpPtHF",
+ "PileUpMuZero",
+ "PileUpEnvelope",
+ "SubTotalPileUp",
+ "SubTotalRelative",
+ "SubTotalPt",
+ "SubTotalScale",
+ "SubTotalAbsolute",
+ "SubTotalMC",
+ "Total",
+ "TotalNoFlavor",
+ "TotalNoTime",
+ "TotalNoFlavorNoTime",
+ "FlavorZJet",
+ "FlavorPhotonJet",
+ "FlavorPureGluon",
+ "FlavorPureQuark",
+ "FlavorPureCharm",
+ "FlavorPureBottom",
+ "TimeRunA",
+ "TimeRunB",
+ "TimeRunC",
+ "TimeRunD",
+ "CorrelationGroupMPFInSitu",
+ "CorrelationGroupIntercalibration",
+ "CorrelationGroupbJES",
+ "CorrelationGroupFlavor",
+ "CorrelationGroupUncorrelated",
+ ]
+
+ for jec_source in config.x.jec["uncertainty_sources"]:
+ idx = all_jec_sources.index(jec_source)
+ config.add_shift(name=f"jec_{jec_source}_up", id=5000 + 2 * idx, type="shape")
+ config.add_shift(name=f"jec_{jec_source}_down", id=5001 + 2 * idx, type="shape")
+ add_shift_aliases(
+ f"jec_{jec_source}",
+ {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"},
+ selection_dependent=True,
+ )
+
+ config.add_shift(name="jer_up", id=6000, type="shape", tags={"selection_dependent"})
+ config.add_shift(name="jer_down", id=6001, type="shape", tags={"selection_dependent"})
+ add_shift_aliases("jer", {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"}, selection_dependent=True)
+
+ get_shifts = lambda *keys: sum(([config.get_shift(f"{k}_up"), config.get_shift(f"{k}_down")] for k in keys), [])
+
+ config.x.event_weights["normalized_btag_weight"] = get_shifts(*(f"btag_{unc}" for unc in btag_uncs))
+ config.x.event_weights["normalized_pu_weight"] = get_shifts("minbias_xs")
+ config.x.event_weights["electron_weight"] = get_shifts("e_sf")
+ config.x.event_weights["muon_weight"] = get_shifts("mu_sf")
+
+ for dataset in config.datasets:
+ dataset.x.event_weights = DotDict()
+ if not dataset.has_tag("skip_scale"):
+ # pdf/scale weights for all non-qcd datasets
+ dataset.x.event_weights["normalized_murf_envelope_weight"] = get_shifts("murf_envelope")
+ dataset.x.event_weights["normalized_mur_weight"] = get_shifts("mur")
+ dataset.x.event_weights["normalized_muf_weight"] = get_shifts("muf")
+
+ if not dataset.has_tag("skip_pdf"):
+ dataset.x.event_weights["normalized_pdf_weight"] = get_shifts("pdf")
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/styling.py b/analysis_templates/ghent_template/__cf_module_name__/config/styling.py
new file mode 100644
index 000000000..f656d4e3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/styling.py
@@ -0,0 +1,39 @@
+"""
+Collection of helpers for styling, e.g.
+- dicitonaries of defaults for variable definition, colors, labels, etc.
+- functions to quickly create variable insts in a predefined way
+"""
+
+import order as od
+
+from columnflow.columnar_util import EMPTY_FLOAT
+
+#
+# Processes
+#
+
+default_process_colors = {
+ "data": "#000000", # black
+ "tt": "#cf9fff", # green
+ "dy_lep": "#377eb8", # blue
+}
+
+
+def stylize_processes(config: od.Config) -> None:
+ """
+ Small helper that sets the process insts to analysis-appropriate defaults
+ For now: only colors and unstacking
+ Could also include some more defaults (labels, unstack, ...)
+ """
+
+ for proc in config.processes:
+ # set default colors
+ if color := default_process_colors.get(proc.name, None):
+ proc.color1 = color
+ proc.color2 = "#000000"
+
+ config.x.default_legend_cfg = {
+ "ncol": 2,
+ "loc": "upper right",
+ "fontsize": 15,
+ }
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/variables.py b/analysis_templates/ghent_template/__cf_module_name__/config/variables.py
new file mode 100644
index 000000000..2917fb459
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/variables.py
@@ -0,0 +1,43 @@
+import order as od
+
+from columnflow.util import maybe_import, call_once_on_config
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+from columnflow.columnar_util import EMPTY_FLOAT
+
+
+@call_once_on_config()
+def add_variables(config: od.Config) -> None:
+ """
+ Adds variables to a *config* that are produced as part of the `features` producer.
+ """
+ config.add_variable(
+ name="event",
+ expression="event",
+ binning=(1, 0.0, 1.0e9),
+ x_title="Event number",
+ discrete_x=True,
+ )
+ config.add_variable(
+ name="run",
+ expression="run",
+ binning=(1, 100000.0, 500000.0),
+ x_title="Run number",
+ discrete_x=True,
+ )
+ config.add_variable(
+ name="lumi",
+ expression="luminosityBlock",
+ binning=(1, 0.0, 5000.0),
+ x_title="Luminosity block",
+ discrete_x=True,
+ )
+ config.add_variable(
+ name="n_jet",
+ expression="n_jet",
+ binning=(6, 0.5, 6.5),
+ x_title="Number of jets",
+ discrete_x=True,
+ )
diff --git a/analysis_templates/ghent_template/__cf_module_name__/config/veto.py b/analysis_templates/ghent_template/__cf_module_name__/config/veto.py
new file mode 100644
index 000000000..8313a6eb6
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/config/veto.py
@@ -0,0 +1,16 @@
+import order as od
+
+from columnflow.util import call_once_on_config
+
+@call_once_on_config()
+def add_vetoes(config: od.Config) -> None:
+ config.x.veto = {
+ 'dy_lep_m10to50_amcatnlo': [
+ {
+ "event": 33098036,
+ "luminosityBlock": 20170,
+ "run": 1,
+ "file": "/store/mc/RunIISummer20UL18NanoAODv9/DYJetsToLL_M-10to50_TuneCP5_13TeV-amcatnloFXFX-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/50000/296CA60E-0122-2F4F-8B04-17DCF5E3E062.root", # noqa
+ }
+ ]
+ }
diff --git a/analysis_templates/ghent_template/__cf_module_name__/inference/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/inference/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/inference/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/inference/example.py b/analysis_templates/ghent_template/__cf_module_name__/inference/example.py
new file mode 100644
index 000000000..0eec2bb30
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/inference/example.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+
+"""
+Example inference model.
+"""
+
+from columnflow.inference import inference_model, ParameterType, ParameterTransformation
+
+
+@inference_model
+def example(self):
+
+ #
+ # categories
+ #
+
+ self.add_category(
+ "cat1",
+ config_category="incl",
+ config_variable="jet1_pt",
+ config_data_datasets=["data_mu_b"],
+ mc_stats=True,
+ )
+ self.add_category(
+ "cat2",
+ config_category="2j",
+ config_variable="jet1_eta",
+ # fake data from TT
+ data_from_processes=["TT"],
+ mc_stats=True,
+ )
+
+ #
+ # processes
+ #
+
+ self.add_process(
+ "ST",
+ is_signal=True,
+ config_process="st",
+ config_mc_datasets=["st_tchannel_t_powheg"],
+ )
+ self.add_process(
+ "TT",
+ config_process="tt",
+ config_mc_datasets=["tt_sl_powheg"],
+ )
+
+ #
+ # parameters
+ #
+
+ # groups
+ self.add_parameter_group("experiment")
+ self.add_parameter_group("theory")
+
+ # lumi
+ lumi = self.config_inst.x.luminosity
+ for unc_name in lumi.uncertainties:
+ self.add_parameter(
+ unc_name,
+ type=ParameterType.rate_gauss,
+ effect=lumi.get(names=unc_name, direction=("down", "up"), factor=True),
+ transformations=[ParameterTransformation.symmetrize],
+ )
+
+ # tune uncertainty
+ self.add_parameter(
+ "tune",
+ process="TT",
+ type=ParameterType.shape,
+ config_shift_source="tune",
+ )
+
+ # muon weight uncertainty
+ self.add_parameter(
+ "mu",
+ process=["ST", "TT"],
+ type=ParameterType.shape,
+ config_shift_source="mu",
+ )
+
+ # jet energy correction uncertainty
+ self.add_parameter(
+ "jec",
+ process=["ST", "TT"],
+ type=ParameterType.shape,
+ config_shift_source="jec",
+ )
+
+ # a custom asymmetric uncertainty that is converted from rate to shape
+ self.add_parameter(
+ "QCDscale_ttbar",
+ process="TT",
+ type=ParameterType.shape,
+ transformations=[ParameterTransformation.effect_from_rate],
+ effect=(0.5, 1.1),
+ )
+
+
+@inference_model
+def example_no_shapes(self):
+ # same initialization as "example" above
+ example.init_func.__get__(self, self.__class__)()
+
+ #
+ # remove all shape parameters
+ #
+
+ for category_name, process_name, parameter in self.iter_parameters():
+ if parameter.type.is_shape or any(trafo.from_shape for trafo in parameter.transformations):
+ self.remove_parameter(parameter.name, process=process_name, category=category_name)
diff --git a/analysis_templates/ghent_template/__cf_module_name__/ml/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/ml/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/ml/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/ml/example.py b/analysis_templates/ghent_template/__cf_module_name__/ml/example.py
new file mode 100644
index 000000000..48e7e936c
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/ml/example.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+
+"""
+Test model definition.
+"""
+
+from __future__ import annotations
+
+import law
+import order as od
+
+from columnflow.types import Any
+from columnflow.ml import MLModel
+from columnflow.util import maybe_import, dev_sandbox
+from columnflow.columnar_util import Route, set_ak_column
+
+ak = maybe_import("awkward")
+tf = maybe_import("tensorflow")
+
+law.contrib.load("tensorflow")
+
+
+class ExampleModel(MLModel):
+
+ # mark the model as accepting only a single config
+ single_config = True
+
+ def setup(self):
+ # dynamically add variables for the quantities produced by this model
+ if f"{self.cls_name}.output" not in self.config_inst.variables:
+ self.config_inst.add_variable(
+ name=f"{self.cls_name}.output",
+ null_value=-1,
+ binning=(20, -1.0, 1.0),
+ x_title=f"{self.cls_name} DNN output",
+ )
+
+ def sandbox(self, task: law.Task) -> str:
+ return dev_sandbox("bash::$__cf_short_name_uc___BASE/sandboxes/example.sh")
+
+ def datasets(self, config_inst: od.Config) -> set[od.Dataset]:
+ return {
+ config_inst.get_dataset("st_tchannel_t_powheg"),
+ config_inst.get_dataset("tt_sl_powheg"),
+ }
+
+ def uses(self, config_inst: od.Config) -> set[Route | str]:
+ return {
+ "Jet.pt", "Muon.pt",
+ }
+
+ def produces(self, config_inst: od.Config) -> set[Route | str]:
+ return {
+ f"{self.cls_name}.ouptut",
+ }
+
+ def output(self, task: law.Task) -> law.FileSystemDirectoryTarget:
+ return task.target(f"mlmodel_f{task.branch}of{self.folds}", dir=True)
+
+ def open_model(self, target: law.FileSystemDirectoryTarget) -> tf.keras.models.Model:
+ return target.load(formatter="tf_keras_model")
+
+ def train(
+ self,
+ task: law.Task,
+ input: dict[str, list[dict[str, law.FileSystemFileTarget]]],
+ output: law.FileSystemDirectoryTarget,
+ ) -> None:
+ # define a dummy NN
+ x = tf.keras.Input(shape=(2,))
+ a1 = tf.keras.layers.Dense(10, activation="elu")(x)
+ y = tf.keras.layers.Dense(2, activation="softmax")(a1)
+ model = tf.keras.Model(inputs=x, outputs=y)
+
+ # the output is just a single directory target
+ output.dump(model, formatter="tf_keras_model")
+
+ def evaluate(
+ self,
+ task: law.Task,
+ events: ak.Array,
+ models: list[Any],
+ fold_indices: ak.Array,
+ events_used_in_training: bool = False,
+ ) -> ak.Array:
+ # fake evaluation
+ events = set_ak_column(events, f"{self.cls_name}.output", 0.5)
+
+ return events
+
+
+# usable derivations
+example = ExampleModel.derive("example", cls_dict={"folds": 2})
diff --git a/analysis_templates/ghent_template/__cf_module_name__/plotting/__init__ b/analysis_templates/ghent_template/__cf_module_name__/plotting/__init__
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/plotting/__init__
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/plotting/example.py b/analysis_templates/ghent_template/__cf_module_name__/plotting/example.py
new file mode 100644
index 000000000..943d3ce33
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/plotting/example.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+
+"""
+Examples for custom plot functions.
+"""
+
+from __future__ import annotations
+
+from collections import OrderedDict
+
+from columnflow.util import maybe_import
+from columnflow.plotting.plot_util import (
+ remove_residual_axis,
+ apply_variable_settings,
+ apply_process_settings,
+)
+
+hist = maybe_import("hist")
+np = maybe_import("numpy")
+mpl = maybe_import("matplotlib")
+plt = maybe_import("matplotlib.pyplot")
+mplhep = maybe_import("mplhep")
+od = maybe_import("order")
+
+
+def my_plot1d_func(
+ hists: OrderedDict[od.Process, hist.Hist],
+ config_inst: od.Config,
+ category_inst: od.Category,
+ variable_insts: list[od.Variable],
+ style_config: dict | None = None,
+ yscale: str | None = "",
+ process_settings: dict | None = None,
+ variable_settings: dict | None = None,
+ example_param: str | float | bool | None = None,
+ **kwargs,
+) -> tuple(plt.Figure, tuple(plt.Axis,)):
+ """
+ This is an exemplary custom plotting function.
+
+ Exemplary task call:
+
+ .. code-block:: bash
+ law run cf.PlotVariables1D --version v1 --processes st,tt --variables jet1_pt \
+ --plot-function __cf_module_name__.plotting.example.my_plot1d_func \
+ --general-settings example_param=some_text
+ """
+ # we can add arbitrary parameters via the `general_settings` parameter to access them in the
+ # plotting function. They are automatically parsed either to a bool, float, or string
+ print(f"The example_param has been set to '{example_param}' (type: {type(example_param)})")
+
+ # call helper function to remove shift axis from histogram
+ remove_residual_axis(hists, "shift")
+
+ # call helper functions to apply the variable_settings and process_settings
+ variable_inst = variable_insts[0]
+ hists = apply_variable_settings(hists, variable_insts, variable_settings)
+ hists = apply_process_settings(hists, process_settings)
+
+ # use the mplhep CMS stype
+ plt.style.use(mplhep.style.CMS)
+
+ # create a figure and fill it with content
+ fig, ax = plt.subplots()
+ for proc_inst, h in hists.items():
+ h.plot1d(
+ ax=ax,
+ label=proc_inst.label,
+ color=proc_inst.color1,
+ )
+
+ # styling and parameter implementation (e.g. `yscale`)
+ ax.set(
+ yscale=yscale,
+ ylabel=variable_inst.get_full_y_title(),
+ xlabel=variable_inst.get_full_x_title(),
+ xscale="log" if variable_inst.log_x else "linear",
+ )
+ ax.legend()
+ mplhep.cms.label(ax=ax, fontsize=22, llabel="private work")
+
+ # task expects a figure and a tuple of axes as output
+ return fig, (ax,)
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/production/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/cutflow_features.py b/analysis_templates/ghent_template/__cf_module_name__/production/cutflow_features.py
new file mode 100644
index 000000000..977eb1959
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/cutflow_features.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+
+"""
+Column production methods for cutflow features.
+"""
+
+from columnflow.selection import SelectionResult
+from columnflow.production import Producer, producer
+from columnflow.production.categories import category_ids
+from columnflow.production.cms.mc_weight import mc_weight
+from columnflow.selection.util import create_collections_from_masks
+from columnflow.util import maybe_import
+from columnflow.columnar_util import EMPTY_FLOAT, Route, set_ak_column
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@producer(
+ uses={
+ mc_weight, category_ids,
+ # nano columns
+ "Jet.pt",
+ },
+ produces={
+ mc_weight, category_ids,
+ # new columns
+ "cutflow.jet1_pt",
+ },
+)
+def cutflow_features(
+ self: Producer,
+ events: ak.Array,
+ results: SelectionResult,
+ **kwargs,
+) -> ak.Array:
+ if self.dataset_inst.is_mc:
+ events = self[mc_weight](events, **kwargs)
+
+ # apply object masks and create new collections
+ reduced_events = create_collections_from_masks(events, results.objects)
+
+ # create category ids per event and add categories back to the
+ events = self[category_ids](reduced_events, target_events=events, **kwargs)
+
+ # add cutflow columns
+ events = set_ak_column(
+ events,
+ "cutflow.jet1_pt",
+ Route("Jet.pt[:,0]").apply(events, EMPTY_FLOAT),
+ )
+
+ return events
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/default.py b/analysis_templates/ghent_template/__cf_module_name__/production/default.py
new file mode 100644
index 000000000..2bc269bb8
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/default.py
@@ -0,0 +1,60 @@
+
+"""
+Column production methods related to higher-level features.
+"""
+
+import functools
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import, four_vec
+from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
+
+from columnflow.production.categories import category_ids
+
+from __cf_short_name_lc__.production.weights import event_weights
+from __cf_short_name_lc__.config.categories import add_categories_production
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+coffea = maybe_import("coffea")
+maybe_import("coffea.nanoevents.methods.nanoaod")
+
+
+@producer(
+ uses=({
+ category_ids,
+ event_weights,
+ } | four_vec(
+ {"Electron", "Muon", }
+ ) | four_vec(
+ {"Jet"},
+ {"hadronFlavour"}
+ )
+ ),
+ produces=({
+ category_ids, event_weights,
+ "ht", "n_jet", "n_electron", "n_muon", "n_bjet"}),
+)
+def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+
+ # add event weights
+ if self.dataset_inst.is_mc:
+ events = self[event_weights](events, **kwargs)
+
+ # (re)produce category i
+ events = self[category_ids](events, **kwargs)
+
+ events = set_ak_column(events, "ht", ak.sum(events.Jet.pt, axis=1), value_type=np.float32)
+ events = set_ak_column(events, "n_jet", ak.sum(events.Jet.pt > 0, axis=1))
+ events = set_ak_column(events, "n_bjet", ak.sum(events.Jet.btagDeepFlavB >=
+ self.config_inst.x.btag_working_points.deepjet.medium, axis=1))
+ events = set_ak_column(events, "n_electron", ak.sum(events.Electron.pt > 0, axis=1))
+ events = set_ak_column(events, "n_muon", ak.sum(events.Muon.pt > 0, axis=1))
+
+ return events
+
+
+@default.init
+def default_init(self: Producer) -> None:
+ # add categories to config
+ add_categories_production(self.config_inst)
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/example.py b/analysis_templates/ghent_template/__cf_module_name__/production/example.py
new file mode 100644
index 000000000..b190ac04f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/example.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+
+"""
+Column production methods related to higher-level features.
+"""
+
+
+from columnflow.production import Producer, producer
+from columnflow.production.categories import category_ids
+from columnflow.production.normalization import normalization_weights
+from columnflow.production.cms.seeds import deterministic_seeds
+from columnflow.production.cms.mc_weight import mc_weight
+from columnflow.production.cms.muon import muon_weights
+from columnflow.selection.util import create_collections_from_masks
+from columnflow.util import maybe_import
+from columnflow.columnar_util import EMPTY_FLOAT, Route, set_ak_column
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@producer(
+ uses={
+ # nano columns
+ "Jet.pt",
+ },
+ produces={
+ # new columns
+ "ht", "n_jet",
+ },
+)
+def features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+ events = set_ak_column(events, "ht", ak.sum(events.Jet.pt, axis=1))
+ events = set_ak_column(events, "n_jet", ak.num(events.Jet.pt, axis=1), value_type=np.int32)
+
+ return events
+
+
+@producer(
+ uses={
+ mc_weight, category_ids,
+ # nano columns
+ "Jet.pt",
+ },
+ produces={
+ mc_weight, category_ids,
+ # new columns
+ "cutflow.jet1_pt",
+ },
+)
+def cutflow_features(
+ self: Producer,
+ events: ak.Array,
+ object_masks: dict[str, dict[str, ak.Array]],
+ **kwargs,
+) -> ak.Array:
+ if self.dataset_inst.is_mc:
+ events = self[mc_weight](events, **kwargs)
+
+ # apply object masks and create new collections
+ reduced_events = create_collections_from_masks(events, object_masks)
+
+ # create category ids per event and add categories back to the
+ events = self[category_ids](reduced_events, target_events=events, **kwargs)
+
+ # add cutflow columns
+ events = set_ak_column(
+ events,
+ "cutflow.jet1_pt",
+ Route("Jet.pt[:,0]").apply(events, EMPTY_FLOAT),
+ )
+
+ return events
+
+
+@producer(
+ uses={
+ features, category_ids, normalization_weights, muon_weights, deterministic_seeds,
+ },
+ produces={
+ features, category_ids, normalization_weights, muon_weights, deterministic_seeds,
+ },
+)
+def example(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+ # features
+ events = self[features](events, **kwargs)
+
+ # category ids
+ events = self[category_ids](events, **kwargs)
+
+ # deterministic seeds
+ events = self[deterministic_seeds](events, **kwargs)
+
+ # mc-only weights
+ if self.dataset_inst.is_mc:
+ # normalization weights
+ events = self[normalization_weights](events, **kwargs)
+
+ # muon weights
+ events = self[muon_weights](events, **kwargs)
+
+ return events
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/normalized_btag.py b/analysis_templates/ghent_template/__cf_module_name__/production/normalized_btag.py
new file mode 100644
index 000000000..b25f74b7b
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/normalized_btag.py
@@ -0,0 +1,130 @@
+# coding: utf-8
+
+"""
+Producers for phase-space normalized btag scale factor weights.
+"""
+
+from __future__ import annotations
+
+from columnflow.production import Producer, producer
+from columnflow.production.cms.btag import btag_weights
+from columnflow.util import maybe_import, safe_div, InsertableDict
+from columnflow.columnar_util import set_ak_column
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@producer(
+ uses={
+ btag_weights.PRODUCES, "process_id", "Jet.pt",
+ },
+ # produced columns are defined in the init function below
+ mc_only=True,
+)
+def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+
+ for weight_name in self[btag_weights].produces:
+ if not weight_name.startswith("btag_weight"):
+ continue
+
+ # create a weight vectors starting with ones for both weight variations, i.e.,
+ # nomalization per pid and normalization per pid and jet multiplicity
+ norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
+ norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)
+
+ # fill weights with a new mask per unique process id (mostly just one)
+ for pid in self.unique_process_ids:
+ pid_mask = events.process_id == pid
+ # single value
+ norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
+ # lookup table
+ n_jets = ak.num(events[pid_mask].Jet.pt, axis=1)
+ norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]
+
+ # multiply with actual weight
+ norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
+ norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
+
+ # store them
+ events = set_ak_column(events, f"normalized_{weight_name}", norm_weight_per_pid)
+ events = set_ak_column(events, f"normalized_njet_{weight_name}", norm_weight_per_pid_njet)
+
+ return events
+
+
+@normalized_btag_weights.init
+def normalized_btag_weights_init(self: Producer) -> None:
+ for weight_name in self[btag_weights].produces:
+ if not weight_name.startswith("btag_weight"):
+ continue
+
+ self.produces.add(f"normalized_{weight_name}")
+ self.produces.add(f"normalized_njet_{weight_name}")
+
+
+@normalized_btag_weights.requires
+def normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
+ from columnflow.tasks.selection import MergeSelectionStats
+ reqs["selection_stats"] = MergeSelectionStats.req(
+ self.task,
+ tree_index=0,
+ branch=-1,
+ _exclude=MergeSelectionStats.exclude_params_forest_merge,
+ )
+
+
+@normalized_btag_weights.setup
+def normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
+ # load the selection stats
+ stats = inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json")
+
+ # get the unique process ids in that dataset
+ key = "sum_mc_weight_selected_no_bjet_per_process_and_njet"
+ self.unique_process_ids = list(map(int, stats[key].keys()))
+
+ # get the maximum numbers of jets
+ max_n_jets = max(map(int, sum((list(d.keys()) for d in stats[key].values()), [])))
+
+ # helper to get numerators and denominators
+ def numerator_per_pid(pid):
+ key = "sum_mc_weight_selected_no_bjet_per_process"
+ return stats[key].get(str(pid), 0.0)
+
+ def denominator_per_pid(weight_name, pid):
+ key = f"sum_mc_weight_{weight_name}_selected_no_bjet_per_process"
+ return stats[key].get(str(pid), 0.0)
+
+ def numerator_per_pid_njet(pid, n_jets):
+ key = "sum_mc_weight_selected_no_bjet_per_process_and_njet"
+ d = stats[key].get(str(pid), {})
+ return d.get(str(n_jets), 0.0)
+
+ def denominator_per_pid_njet(weight_name, pid, n_jets):
+ key = f"sum_mc_weight_{weight_name}_selected_no_bjet_per_process_and_njet"
+ d = stats[key].get(str(pid), {})
+ return d.get(str(n_jets), 0.0)
+
+ # extract the ratio per weight and pid
+ self.ratio_per_pid = {
+ weight_name: {
+ pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid))
+ for pid in self.unique_process_ids
+ }
+ for weight_name in self[btag_weights].produces
+ if weight_name.startswith("btag_weight")
+ }
+
+ # extract the ratio per weight, pid and also the jet multiplicity, using the latter as in index
+ # for a lookup table (since it naturally starts at 0)
+ self.ratio_per_pid_njet = {
+ weight_name: {
+ pid: np.array([
+ safe_div(numerator_per_pid_njet(pid, n_jets), denominator_per_pid_njet(weight_name, pid, n_jets))
+ for n_jets in range(max_n_jets + 1)
+ ])
+ for pid in self.unique_process_ids
+ }
+ for weight_name in self[btag_weights].produces
+ if weight_name.startswith("btag_weight")
+ }
\ No newline at end of file
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/normalized_weights.py b/analysis_templates/ghent_template/__cf_module_name__/production/normalized_weights.py
new file mode 100644
index 000000000..d3129b348
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/normalized_weights.py
@@ -0,0 +1,126 @@
+# coding: utf-8
+
+"""
+Column production methods related to generic event weights.
+"""
+
+from typing import Iterable, Callable
+
+import law
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import, safe_div, InsertableDict
+from columnflow.columnar_util import set_ak_column
+
+ak = maybe_import("awkward")
+np = maybe_import("numpy")
+
+
+logger = law.logger.get_logger(__name__)
+
+
+def normalized_weight_factory(
+ producer_name: str,
+ weight_producers: Iterable[Producer],
+ **kwargs,
+) -> Callable:
+
+ @producer(
+ uses=set(weight_producers) | set().union(*[w.produces for w in weight_producers]) | {"process_id"},
+ cls_name=producer_name,
+ mc_only=True,
+ # skip the checking existence of used/produced columns because not all columns are there
+ check_used_columns=False,
+ check_produced_columns=False,
+ # remaining produced columns are defined in the init function below
+ )
+ def normalized_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+
+ # check existence of requested weights to normalize and run producer if missing
+ missing_weights = self.weight_names.difference(events.fields)
+
+ if missing_weights:
+ # try to produce missing weights
+ for prod in self.weight_producers:
+ if (
+ self[prod].produced_columns.difference(events.fields) and
+ self[prod].used_columns.intersection(events.fields)
+ ):
+ logger.info(f"Rerun producer {self[prod].cls_name}")
+ events = self[prod](events, **kwargs)
+
+ # Create normalized weight columns if possible
+ if not_reproduced := missing_weights.difference(events.fields):
+ logger.info(f"Weight columns {not_reproduced} could not be reproduced")
+
+
+ for weight_name in self.weight_names.intersection(events.fields):
+ # create a weight vector starting with ones
+ norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
+
+ # fill weights with a new mask per unique process id (mostly just one)
+ for pid in self.unique_process_ids:
+ pid_mask = events.process_id == pid
+ norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
+
+ # multiply with actual weight
+ norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
+
+ # store it
+ norm_weight_per_pid = ak.values_astype(norm_weight_per_pid, np.float32)
+ events = set_ak_column(events, f"normalized_{weight_name}", norm_weight_per_pid)
+
+
+ return events
+
+ @normalized_weight.init
+ def normalized_weight_init(self: Producer) -> None:
+ self.weight_producers = weight_producers
+
+ # resolve weight names
+ self.weight_names = set()
+ for col in self.used_columns:
+ col = col.string_nano_column
+ if "weight" in col and "normalized" not in col and "btag" not in col:
+ self.weight_names.add(col)
+
+ self.produces |= set(f"normalized_{weight_name}" for weight_name in self.weight_names)
+
+ @normalized_weight.requires
+ def normalized_weight_requires(self: Producer, reqs: dict) -> None:
+ from columnflow.tasks.selection import MergeSelectionStats
+ reqs["selection_stats"] = MergeSelectionStats.req(
+ self.task,
+ tree_index=0,
+ branch=-1,
+ _exclude=MergeSelectionStats.exclude_params_forest_merge,
+ )
+
+ @normalized_weight.setup
+ def normalized_weight_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
+ # load the selection stats
+ stats = inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json")
+
+ # get the unique process ids in that dataset
+ key = "sum_mc_weight_per_process"
+ self.unique_process_ids = list(map(int, stats[key].keys()))
+
+ # helper to get numerators and denominators
+ def numerator_per_pid(pid):
+ key = "sum_mc_weight_per_process"
+ return stats[key].get(str(pid), 0.0)
+
+ def denominator_per_pid(weight_name, pid):
+ key = f"sum_mc_weight_{weight_name}_per_process"
+ return stats[key].get(str(pid), 0.0)
+
+ # extract the ratio per weight and pid
+ self.ratio_per_pid = {
+ weight_name: {
+ pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid))
+ for pid in self.unique_process_ids
+ }
+ for weight_name in self.weight_names
+ }
+
+ return normalized_weight
\ No newline at end of file
diff --git a/analysis_templates/ghent_template/__cf_module_name__/production/weights.py b/analysis_templates/ghent_template/__cf_module_name__/production/weights.py
new file mode 100644
index 000000000..e435ae266
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/production/weights.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+
+"""
+Column production methods related to generic event weights.
+"""
+
+from columnflow.util import maybe_import
+from columnflow.columnar_util import set_ak_column, has_ak_column, Route
+from columnflow.selection import SelectionResult
+from columnflow.production import Producer, producer
+from columnflow.production.cms.pileup import pu_weight
+from columnflow.production.normalization import normalization_weights
+from columnflow.production.cms.electron import electron_weights
+from columnflow.production.cms.muon import muon_weights
+from columnflow.production.cms.btag import btag_weights
+from columnflow.production.cms.scale import murmuf_weights, murmuf_envelope_weights
+from columnflow.production.cms.pdf import pdf_weights
+from __cf_short_name_lc__.production.normalized_weights import normalized_weight_factory
+from __cf_short_name_lc__.production.normalized_btag import normalized_btag_weights
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@producer(
+ produces={"event_weight"},
+ mc_only=True,
+)
+def event_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+ """
+ Producer that calculates the 'final' event weight (as done in cf.CreateHistograms)
+ """
+ weight = ak.Array(np.ones(len(events)))
+ if self.dataset_inst.is_mc:
+ for column in self.config_inst.x.event_weights:
+ weight = weight * Route(column).apply(events)
+ for column in self.dataset_inst.x("event_weights", []):
+ if has_ak_column(events, column):
+ weight = weight * Route(column).apply(events)
+ else:
+ self.logger.warning_once(
+ f"missing_dataset_weight_{column}",
+ f"weight '{column}' for dataset {self.dataset_inst.name} not found",
+ )
+
+ events = set_ak_column(events, "event_weight", weight)
+
+ return events
+
+
+@event_weight.init
+def event_weight_init(self: Producer) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ self.uses |= set(self.config_inst.x.event_weights.keys())
+ self.uses |= set(self.dataset_inst.x("event_weights", {}).keys())
+
+
+@producer(
+ uses={
+ pu_weight,
+ # btag_weights, # TODO: compute btag SF weights (this one applies shape correction)
+ murmuf_envelope_weights,
+ murmuf_weights,
+ pdf_weights
+ },
+ # don't save btag_weights to save storage space, since we can reproduce them in ProduceColumns
+ produces={pu_weight},
+ mc_only=True,
+)
+def event_weights_to_normalize(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array:
+ """
+ Wrapper of several event weight producers that are typically called as part of SelectEvents
+ since it is required to normalize them before applying certain event selections.
+ """
+
+ # compute pu weights
+
+ events = self[pu_weight](events, **kwargs)
+
+ # TODO: compute btag SF weights (for renormalization tasks)
+ # btag_weights works for btagging shape corrections. Not for wp.
+ # events = self[btag_weights](events, jet_mask=results.aux["jet_mask"], **kwargs)
+
+ # skip scale/pdf weights for some datasets (missing columns)
+ if not self.dataset_inst.has_tag("skip_scale"):
+ # compute scale weights
+ events = self[murmuf_envelope_weights](events, **kwargs)
+
+ # read out mur and weights
+ events = self[murmuf_weights](events, **kwargs)
+
+ if not self.dataset_inst.has_tag("skip_pdf"):
+ # compute pdf weights
+ events = self[pdf_weights](
+ events,
+ outlier_action="remove",
+ outlier_log_mode="warning",
+ **kwargs,
+ )
+
+ return events
+
+
+@event_weights_to_normalize.init
+def event_weights_to_normalize_init(self) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ if not self.dataset_inst.has_tag("skip_scale"):
+ self.uses |= {murmuf_envelope_weights, murmuf_weights}
+ self.produces |= {murmuf_envelope_weights, murmuf_weights}
+
+ if not self.dataset_inst.has_tag("skip_pdf"):
+ self.uses |= {pdf_weights}
+ self.produces |= {pdf_weights}
+
+
+normalized_scale_weights = normalized_weight_factory(
+ producer_name="normalized_scale_weights",
+ weight_producers={murmuf_envelope_weights, murmuf_weights},
+)
+
+normalized_pdf_weights = normalized_weight_factory(
+ producer_name="normalized_pdf_weights",
+ weight_producers={pdf_weights},
+)
+
+normalized_pu_weights = normalized_weight_factory(
+ producer_name="normalized_pu_weights",
+ weight_producers={pu_weight},
+)
+
+
+@producer(
+ uses={
+ normalization_weights, electron_weights, muon_weights, btag_weights,
+ normalized_btag_weights,
+ normalized_pu_weights,
+ event_weight,
+ },
+ produces={
+ normalization_weights, electron_weights, muon_weights,
+ normalized_btag_weights,
+ normalized_pu_weights,
+ event_weight,
+ },
+ mc_only=True,
+)
+def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+ """
+ Wrapper of several event weight producers that are typically called in ProduceColumns.
+ """
+ # compute normalization weights
+
+ events = self[normalization_weights](events, **kwargs)
+
+ # compute btag SF weights
+ events = self[btag_weights](events, **kwargs)
+ # compute electron and muon SF weights
+ events = self[electron_weights](events, **kwargs)
+ events = self[muon_weights](events, **kwargs)
+
+ # normalize event weights using stats
+ events = self[normalized_btag_weights](events, **kwargs)
+ events = self[normalized_pu_weights](events, **kwargs)
+
+ if not self.dataset_inst.has_tag("skip_scale"):
+ events = self[normalized_scale_weights](events, **kwargs)
+
+ if not self.dataset_inst.has_tag("skip_pdf"):
+ events = self[normalized_pdf_weights](events, **kwargs)
+
+ # calculate the full event weight for plotting purposes
+ events = self[event_weight](events, **kwargs)
+
+ return events
+
+
+@event_weights.init
+def event_weights_init(self: Producer) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ if not self.dataset_inst.has_tag("skip_scale"):
+ self.uses |= {normalized_scale_weights}
+ self.produces |= {normalized_scale_weights}
+
+ if not self.dataset_inst.has_tag("skip_pdf"):
+ self.uses |= {normalized_pdf_weights}
+ self.produces |= {normalized_pdf_weights}
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/selection/__init__.py
new file mode 100644
index 000000000..57d631c3f
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/__init__.py
@@ -0,0 +1 @@
+# coding: utf-8
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/categories.py b/analysis_templates/ghent_template/__cf_module_name__/selection/categories.py
new file mode 100644
index 000000000..b49236b92
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/categories.py
@@ -0,0 +1,69 @@
+"""
+Selection methods defining categories based on selection step results.
+"""
+
+from columnflow.util import maybe_import
+from columnflow.categorization import Categorizer, categorizer
+from columnflow.selection import SelectionResult
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@categorizer(uses={"event"}, call_force=True)
+def catid_selection_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ mask = ak.ones_like(events.event) > 0
+ return events, mask
+
+#
+# Categorizer called as part of cf.SelectEvents
+#
+
+
+@categorizer(uses={"event"}, call_force=True)
+def catid_selection_2e(
+ self: Categorizer, events: ak.Array, results: SelectionResult, **kwargs,
+) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.num(results.objects.Electron.Electron, axis=-1) == 2) &
+ (ak.num(results.objects.Muon.Muon, axis=-1) == 0))
+ return events, mask
+
+
+@categorizer(uses={"event"}, call_force=True)
+def catid_selection_1e1mu(
+ self: Categorizer, events: ak.Array, results: SelectionResult, **kwargs,
+) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.num(results.objects.Electron.Electron, axis=-1) == 1) &
+ (ak.num(results.objects.Muon.Muon, axis=-1) == 1))
+ return events, mask
+
+
+@categorizer(uses={"event"}, call_force=True)
+def catid_selection_2mu(
+ self: Categorizer, events: ak.Array, results: SelectionResult, **kwargs,
+) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.num(results.objects.Electron.Electron, axis=-1) == 0) &
+ (ak.num(results.objects.Muon.Muon, axis=-1) == 2))
+ return events, mask
+
+#
+# Categorizer called as part of cf.ProduceColumns
+#
+
+
+@categorizer(uses={"Electron.pt", "Muon.pt"}, call_force=True)
+def catid_2e(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.sum(events.Electron.pt > 0, axis=-1) == 2) & (ak.sum(events.Muon.pt > 0, axis=-1) == 0))
+ return events, mask
+
+
+@categorizer(uses={"Electron.pt", "Muon.pt"}, call_force=True)
+def catid_1e1mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.sum(events.Electron.pt > 0, axis=-1) == 1) & (ak.sum(events.Muon.pt > 0, axis=-1) == 1))
+ return events, mask
+
+
+@categorizer(uses={"Electron.pt", "Muon.pt"}, call_force=True)
+def catid_2mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+ mask = ((ak.sum(events.Electron.pt > 0, axis=-1) == 0) & (ak.sum(events.Muon.pt > 0, axis=-1) == 2))
+ return events, mask
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/default.py b/analysis_templates/ghent_template/__cf_module_name__/selection/default.py
new file mode 100644
index 000000000..39887f517
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/default.py
@@ -0,0 +1,249 @@
+# coding: utf-8
+
+"""
+Selection modules for __cf_short_name_lc__.
+"""
+
+from collections import defaultdict
+from typing import Tuple
+
+import law
+
+from columnflow.util import maybe_import, four_vec
+from columnflow.columnar_util import set_ak_column, optional_column, has_ak_column
+from columnflow.production.util import attach_coffea_behavior
+
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.util import masked_sorted_indices
+
+from columnflow.production.cms.mc_weight import mc_weight
+from columnflow.production.categories import category_ids
+from columnflow.production.processes import process_ids
+
+from __cf_short_name_lc__.production.weights import event_weights_to_normalize
+from __cf_short_name_lc__.production.cutflow_features import cutflow_features
+
+from __cf_short_name_lc__.selection.objects import object_selection
+from __cf_short_name_lc__.selection.stats import __cf_short_name_lc___increment_stats
+from __cf_short_name_lc__.selection.trigger import trigger_selection
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+coffea = maybe_import("coffea")
+maybe_import("coffea.nanoevents.methods.nanoaod")
+
+logger = law.logger.get_logger(__name__)
+
+
+def TetraVec(arr: ak.Array) -> ak.Array:
+ TetraVec = ak.zip({"pt": arr.pt, "eta": arr.eta, "phi": arr.phi, "mass": arr.mass},
+ with_name="PtEtaPhiMLorentzVector",
+ behavior=coffea.nanoevents.methods.vector.behavior)
+ return TetraVec
+
+
+@selector(
+ uses={
+ process_ids, attach_coffea_behavior, mc_weight, optional_column("veto"),
+ },
+ produces={
+ process_ids, attach_coffea_behavior, mc_weight
+ },
+ exposed=False,
+)
+def pre_selection(
+ self: Selector,
+ events: ak.Array,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ if self.dataset_inst.is_mc:
+ events = self[mc_weight](events, **kwargs)
+
+ # create process ids
+ events = self[process_ids](events, **kwargs)
+ # ensure coffea behavior
+ events = self[attach_coffea_behavior](events, **kwargs)
+
+ results = SelectionResult()
+ results.event = ~events.veto if has_ak_column(events, "veto") else ak.full_like(events.mc_weight, True, dtype=bool)
+ return events, results
+
+
+@selector(
+ uses=four_vec(
+ ("Electron", "Muon"), ("charge", "pdgId", "tight"),
+ ),
+ triggers=None
+)
+def lepton_selection(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ # apply the object selection from results
+ electron = (events.Electron[results.objects.Electron.Electron])
+ muon = (events.Muon[results.objects.Muon.Muon])
+
+ # create new object: leptons
+ lepton = ak.concatenate([muon, electron], axis=-1)
+ lepton = lepton[ak.argsort(lepton.pt, axis=-1, ascending=False)]
+
+ # required for pt cuts and Z-cuts on masks
+ fill_with = {
+ "pt": -999, "eta": -999, "phi": -999, "charge": -999,
+ "pdgId": -999, "mass": -999, "sip3d": -999, 'tight': False,
+ }
+ lepton = ak.fill_none(ak.pad_none(lepton, 2, axis=-1), fill_with)
+
+ # construct the Z-boson candidate mask
+ mll = (TetraVec(lepton[:, 0]) + TetraVec(lepton[:, 1])).mass
+ z_mask = (
+ (lepton[:, 0].charge != lepton[:, 1].charge) &
+ (abs(lepton[:, 0].pdgId) == abs(lepton[:, 1].pdgId)) &
+ (abs(mll - 91) < 15)
+ )
+
+ lepton_mask = (
+ (lepton.pt[:, 0] > 30) &
+ (lepton.pt[:, 1] > 20) &
+ (~z_mask) & # no Z-boson peak leptons
+ (ak.all(lepton.tight, axis=-1)) # all loose leptons in the event must be tight
+ )
+
+ return events, SelectionResult(
+ steps={
+ "Lepton": lepton_mask,
+ },
+ objects={},
+ aux={
+ # save the selected lepton for the duration of the selection
+ # multiplication of a coffea particle with 1 yields the lorentz vector
+ "lepton": lepton,
+ },
+ )
+
+
+@selector(
+ uses=(four_vec("Jet", ("btagDeepFlavB"))),
+ exposed=False,
+)
+def jet_selection(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ jet = (events.Jet[results.objects.Jet.Jet])
+
+ bjet_mask_medium = (jet.btagDeepFlavB >= self.config_inst.x.btag_working_points.deepjet.medium)
+
+ jet_event_mask = (ak.sum(bjet_mask_medium, axis=-1) >= 1)
+
+ return events, SelectionResult(
+ steps={
+ "Jet": jet_event_mask,
+ },
+ )
+
+
+@selector(
+ uses={
+ category_ids, __cf_short_name_lc___increment_stats
+ },
+ produces={
+ category_ids, __cf_short_name_lc___increment_stats
+ },
+ exposed=False,
+)
+def post_selection(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+ # build categories
+ events = self[category_ids](events, results=results, **kwargs)
+ # add cutflow features
+ if self.config_inst.x("do_cutflow_features", False):
+ events = self[cutflow_features](events, results=results, **kwargs)
+
+ # produce event weights
+ if self.dataset_inst.is_mc:
+ events = self[event_weights_to_normalize](events, results=results, **kwargs)
+
+ # increment stats
+ self[__cf_short_name_lc___increment_stats](events, results, stats, **kwargs)
+
+ return events, results
+
+
+@post_selection.init
+def post_selection_init(self: Selector) -> None:
+ if self.config_inst.x("do_cutflow_features", False):
+ self.uses.add(cutflow_features)
+ self.produces.add(cutflow_features)
+
+ if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data:
+ return
+
+ self.uses.add(event_weights_to_normalize)
+ self.produces.add(event_weights_to_normalize)
+
+
+@selector(
+ uses={
+ pre_selection, post_selection,
+ object_selection, trigger_selection, lepton_selection, jet_selection,
+ },
+ produces={
+ pre_selection, post_selection,
+ object_selection, trigger_selection, lepton_selection, jet_selection,
+ },
+ exposed=True,
+)
+def default(
+ self: Selector,
+ events: ak.Array,
+ stats: defaultdict,
+ **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+ # prepare the selection results that are updated at every step
+
+ # lepton selection
+ events, results = self[pre_selection](events, stats, **kwargs)
+
+ # apply trigger selection (with double counting removal for data)
+ events, trigger_results = self[trigger_selection](events, **kwargs)
+ results += trigger_results
+
+ # apply object selection
+ events, object_results = self[object_selection](events, stats, **kwargs)
+ results += object_results
+
+ # apply lepton event selection
+ events, lepton_selection_results = self[lepton_selection](events, results, stats, **kwargs)
+ results += lepton_selection_results
+
+ # apply jet event selection
+ events, jet_selection_results = self[jet_selection](events, results, stats, **kwargs)
+ results += jet_selection_results
+
+ # combine event selection after all steps
+ results.event = (results.event &
+ results.steps.Trigger &
+ results.steps.Lepton &
+ results.steps.Jet &
+ results.steps.Bjet)
+
+ # add cutflow features, passing per-object masks
+ events, results = self[post_selection](events, results, stats, **kwargs)
+
+ return events, results
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/example.py b/analysis_templates/ghent_template/__cf_module_name__/selection/example.py
new file mode 100644
index 000000000..60e8041e7
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/example.py
@@ -0,0 +1,166 @@
+# coding: utf-8
+
+"""
+Exemplary selection methods.
+"""
+
+from collections import defaultdict
+
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.stats import increment_stats
+from columnflow.selection.util import sorted_indices_from_mask
+from columnflow.production.processes import process_ids
+from columnflow.production.cms.mc_weight import mc_weight
+from columnflow.util import maybe_import
+
+from __cf_module_name__.production.example import cutflow_features
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+#
+# other unexposed selectors
+# (not selectable from the command line but used by other, exposed selectors)
+#
+
+
+@selector(
+ uses={"Muon.pt", "Muon.eta"},
+)
+def muon_selection(
+ self: Selector,
+ events: ak.Array,
+ **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+ # example muon selection: exactly one muon
+ muon_mask = (events.Muon.pt >= 20.0) & (abs(events.Muon.eta) < 2.1)
+ muon_sel = ak.sum(muon_mask, axis=1) == 1
+
+ # build and return selection results
+ # "objects" maps source columns to new columns and selections to be applied on the old columns
+ # to create them, e.g. {"Muon": {"MySelectedMuon": indices_applied_to_Muon}}
+ return events, SelectionResult(
+ steps={
+ "muon": muon_sel,
+ },
+ objects={
+ "Muon": {
+ "Muon": muon_mask,
+ },
+ },
+ )
+
+
+@selector(
+ uses={"Jet.pt", "Jet.eta"},
+)
+def jet_selection(
+ self: Selector,
+ events: ak.Array,
+ **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+ # example jet selection: at least one jet
+ jet_mask = (events.Jet.pt >= 25.0) & (abs(events.Jet.eta) < 2.4)
+ jet_sel = ak.sum(jet_mask, axis=1) >= 1
+
+ # build and return selection results
+ # "objects" maps source columns to new columns and selections to be applied on the old columns
+ # to create them, e.g. {"Jet": {"MyCustomJetCollection": indices_applied_to_Jet}}
+ return events, SelectionResult(
+ steps={
+ "jet": jet_sel,
+ },
+ objects={
+ "Jet": {
+ "Jet": sorted_indices_from_mask(jet_mask, events.Jet.pt, ascending=False),
+ },
+ },
+ aux={
+ "n_jets": ak.sum(jet_mask, axis=1),
+ },
+ )
+
+
+#
+# exposed selectors
+# (those that can be invoked from the command line)
+#
+
+@selector(
+ uses={
+ # selectors / producers called within _this_ selector
+ mc_weight, cutflow_features, process_ids, muon_selection, jet_selection,
+ increment_stats,
+ },
+ produces={
+ # selectors / producers whose newly created columns should be kept
+ mc_weight, cutflow_features, process_ids,
+ },
+ exposed=True,
+)
+def example(
+ self: Selector,
+ events: ak.Array,
+ stats: defaultdict,
+ **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+ # prepare the selection results that are updated at every step
+ results = SelectionResult()
+
+ # muon selection
+ events, muon_results = self[muon_selection](events, **kwargs)
+ results += muon_results
+
+ # jet selection
+ events, jet_results = self[jet_selection](events, **kwargs)
+ results += jet_results
+
+ # combined event selection after all steps
+ results.event = results.steps.muon & results.steps.jet
+
+ # create process ids
+ events = self[process_ids](events, **kwargs)
+
+ # add the mc weight
+ if self.dataset_inst.is_mc:
+ events = self[mc_weight](events, **kwargs)
+
+ # add cutflow features, passing per-object masks
+ events = self[cutflow_features](events, results.objects, **kwargs)
+
+ # increment stats
+ weight_map = {
+ "num_events": Ellipsis,
+ "num_events_selected": results.event,
+ }
+ group_map = {}
+ if self.dataset_inst.is_mc:
+ weight_map = {
+ **weight_map,
+ # mc weight for all events
+ "sum_mc_weight": (events.mc_weight, Ellipsis),
+ "sum_mc_weight_selected": (events.mc_weight, results.event),
+ }
+ group_map = {
+ # per process
+ "process": {
+ "values": events.process_id,
+ "mask_fn": (lambda v: events.process_id == v),
+ },
+ # per jet multiplicity
+ "njet": {
+ "values": results.x.n_jets,
+ "mask_fn": (lambda v: results.x.n_jets == v),
+ },
+ }
+ events, results = self[increment_stats](
+ events,
+ results,
+ stats,
+ weight_map=weight_map,
+ group_map=group_map,
+ **kwargs,
+ )
+
+ return events, results
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/objects.py b/analysis_templates/ghent_template/__cf_module_name__/selection/objects.py
new file mode 100644
index 000000000..b9d7e2e09
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/objects.py
@@ -0,0 +1,194 @@
+# coding: utf-8
+
+"""
+Selection modules for object selection of Muon, Electron, and Jet.
+"""
+
+from collections import defaultdict
+from typing import Tuple
+
+import law
+
+from columnflow.util import maybe_import, four_vec
+from columnflow.columnar_util import set_ak_column
+from columnflow.production.util import attach_coffea_behavior
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.util import masked_sorted_indices
+
+ak = maybe_import("awkward")
+
+
+def masked_sorted_indices(mask: ak.Array, sort_var: ak.Array, ascending: bool = False) -> ak.Array:
+ """
+ Helper function to obtain the correct indices of an object mask
+ """
+ indices = ak.argsort(sort_var, axis=-1, ascending=ascending)
+ return indices[mask[indices]]
+
+
+@selector(
+ uses=four_vec(
+ ("Muon"),
+ ("sip3d", "dxy", "dz", "miniPFRelIso_all", "tightId")
+ ) | {"event"},
+ triggers=None
+)
+def muon_object(
+ self: Selector,
+ events: ak.Array,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ muon = (events.Muon)
+
+ # loose object electron mask
+ mu_mask = (
+ (abs(muon.eta) < 2.4) &
+ (muon.pt > 10.) &
+ (muon.miniPFRelIso_all < 0.4) &
+ (muon.sip3d < 8) &
+ (abs(muon.dxy) < 0.05) &
+ (abs(muon.dz) < 0.1)
+ )
+
+ # tight object muon mask (tight cutbased ID)
+ mu_mask_tight = (
+ (mu_mask) &
+ (muon.tightId)
+ )
+
+ events = set_ak_column(events, "Muon.tight", mu_mask_tight, value_type=bool)
+
+ return events, SelectionResult(
+ steps={},
+ objects={
+ "Muon": {
+ "Muon": masked_sorted_indices(mu_mask, muon.pt)
+ }
+ },
+ )
+
+
+@selector(
+ uses=four_vec(
+ ("Electron"),
+ ("sip3d", "charge", "isPFcand", "dxy", "dz", "miniPFRelIso_all", "mvaFall17V2Iso_WP90", "tightCharge",
+ "lostHits", "convVeto")
+ ) | four_vec(
+ ("Muon"),
+ ),
+ triggers=None
+)
+def electron_object(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ electron = (events.Electron)
+ # add muon loose selection to veto electrons that coincide with muons
+ muon = (events.Muon[results.objects.Muon.Muon])
+
+ # loose object electron mask
+ e_mask = (
+ (abs(electron.eta) < 2.5) &
+ (electron.pt > 15) &
+ (electron.miniPFRelIso_all < 0.4) &
+ (electron.sip3d < 8) &
+ (abs(electron.dxy) < 0.05) &
+ (abs(electron.dz) < 0.1) &
+ (electron.lostHits < 2) &
+ (electron.isPFcand) &
+ (electron.convVeto) &
+ (electron.tightCharge > 1) &
+ # remove electrons that have muon close to it
+ (ak.is_none(electron.nearest(muon, threshold=0.05), axis=-1))
+ )
+ # tight object electron mask (mvaFall17 WP90)
+ e_mask_tight = (
+ (e_mask) &
+ (electron.mvaFall17V2Iso_WP90)
+ )
+
+ events = set_ak_column(events, "Electron.tight", e_mask_tight, value_type=bool)
+
+ return events, SelectionResult(
+ steps={},
+ objects={
+ "Electron": {
+ "Electron": masked_sorted_indices(e_mask, electron.pt)
+ }
+ },
+ )
+
+
+@selector(
+ uses=(four_vec({"Electron", "Muon"}) | four_vec("Jet", ("jetId", "btagDeepFlavB"))),
+ exposed=False,
+)
+def jet_object(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+
+ jet = (events.Jet)
+ muon = (events.Muon)[results.objects.Muon.Muon]
+ electron = (events.Electron)[results.objects.Electron.Electron]
+
+ dR_mask = (
+ (ak.is_none(jet.nearest(muon, threshold=0.4), axis=-1)) &
+ (ak.is_none(jet.nearest(electron, threshold=0.4), axis=-1))
+ )
+
+ jet_mask = (
+ (jet.pt > 30) &
+ (abs(jet.eta) < 2.5) &
+ (jet.jetId >= 2) &
+ (dR_mask)
+ )
+
+ jet_indices = masked_sorted_indices(jet_mask, events.Jet.pt)
+ n_jets = ak.sum(jet_mask, axis=-1)
+
+ return events, SelectionResult(
+ steps={},
+ objects={
+ "Jet": {
+ "Jet": jet_indices,
+ },
+ },
+ aux={
+ "jet_mask": jet_mask,
+ "n_jets": n_jets,
+ },
+ )
+
+
+@selector(
+ uses=(muon_object, electron_object, jet_object),
+ exposed=False,
+)
+def object_selection(
+ self: Selector,
+ events: ak.Array,
+ stats: defaultdict,
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+ # apply muon object selection
+ events, results = self[muon_object](events, stats, **kwargs)
+
+ # apply electron object selection
+ events, electron_results = self[electron_object](events, results, stats, **kwargs)
+ results += electron_results
+
+ # apply jet object selection
+ events, jet_results = self[jet_object](events, results, stats, **kwargs)
+ results += jet_results
+
+ return events, results
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/stats.py b/analysis_templates/ghent_template/__cf_module_name__/selection/stats.py
new file mode 100644
index 000000000..dbabb7219
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/stats.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+
+"""
+Stat-related methods.
+"""
+from __future__ import annotations
+
+import functools
+
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.stats import increment_stats
+from columnflow.production import Producer, producer
+from columnflow.production.cms.btag import btag_weights
+from __cf_short_name_lc__.production.weights import event_weights_to_normalize
+
+from columnflow.util import maybe_import
+from columnflow.columnar_util import optional_column, has_ak_column
+from columnflow.ml import MLModel
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@selector(
+ uses={
+ increment_stats,
+ event_weights_to_normalize,
+ optional_column("veto"),
+ },
+)
+def __cf_short_name_lc___increment_stats(
+ self: Selector,
+ events: ak.Array,
+ results: SelectionResult,
+ stats: dict,
+ **kwargs,
+) -> ak.Array:
+ # collect important information from the results
+ unvetoed_mask = ~events.veto if has_ak_column(events, "veto") else Ellipsis
+ event_mask = results.event
+ n_jets = results.x.n_jets
+
+ # weight map definition
+ weight_map = {
+ # "num" operations
+ "num_events": Ellipsis, # all events
+ "num_events_selected": event_mask, # selected events only
+ }
+
+ if self.dataset_inst.is_mc:
+ weight_map["num_negative_weights"] = (events.mc_weight < 0) & \
+ (True if unvetoed_mask is Ellipsis else unvetoed_mask)
+ # "sum" operations
+ weight_map["sum_mc_weight"] = (events.mc_weight, unvetoed_mask) # weights of all events
+ weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_mask) # weights of selected events
+
+ weight_columns = list(
+ set(self[event_weights_to_normalize].produced_columns)
+ )
+ weight_columns = sorted([col.string_nano_column for col in weight_columns])
+
+ # mc weight times correction weight (with variations) without any selection
+ for name in weight_columns:
+ if "weight" not in name:
+ # skip non-weight columns here
+ continue
+
+ weight_map[f"sum_mc_weight_{name}"] = (events.mc_weight * events[name], unvetoed_mask)
+
+ # weights for selected events
+ weight_map[f"sum_mc_weight_{name}_selected"] = (events.mc_weight * events[name], event_mask)
+
+ group_map = {
+ "process": {
+ "values": events.process_id,
+ "mask_fn": (lambda v: events.process_id == v),
+ },
+ "njet": {
+ "values": results.x.n_jets,
+ "mask_fn": (lambda v: n_jets == v),
+ },
+ }
+
+ group_combinations = [("process", "njet")]
+
+ self[increment_stats](
+ events,
+ results,
+ stats,
+ weight_map=weight_map,
+ group_map=group_map,
+ group_combinations=group_combinations,
+ **kwargs,
+ )
+
+ return events
+
+
+@__cf_short_name_lc___increment_stats.init
+def __cf_short_name_lc___increment_stats_init(self: Selector) -> None:
+ if not getattr(self, "dataset_inst", None):
+ return
+
+ if self.dataset_inst.is_mc:
+ self.uses |= {"mc_weight"}
diff --git a/analysis_templates/ghent_template/__cf_module_name__/selection/trigger.py b/analysis_templates/ghent_template/__cf_module_name__/selection/trigger.py
new file mode 100644
index 000000000..38a9075ba
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/selection/trigger.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import annotations
+import order as od
+
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.util import maybe_import
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+def add_triggers(cfg: od.Config, campaign: od.Campaign):
+ cfg.x.trigger_matrix = [
+ (
+ "EGamma", {
+ "Ele32_WPTight_Gsf",
+ "Ele115_CaloIdVT_GsfTrkIdT",
+ "Ele23_Ele12_CaloIdL_TrackIdL_IsoVL",
+ "DoubleEle25_CaloIdL_MW",
+ "Ele16_Ele12_Ele8_CaloIdL_TrackIdL",
+ },
+ ),
+ (
+ "DoubleMuon", {
+ "Mu37_TkMu27",
+ "Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_Mass3p8",
+ },
+ ),
+ (
+ "MuonEG", {
+ "Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_DZ",
+ "Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ",
+ "Mu12_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ",
+ "Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL",
+ "Mu27_Ele37_CaloIdL_MW",
+ "Mu37_Ele27_CaloIdL_MW",
+ },
+ ),
+ (
+ "SingleMuon", {
+ "IsoMu24",
+ "IsoMu27",
+ "Mu50",
+ "OldMu100",
+ "TkMu100",
+ },
+ ),
+ ]
+
+ cfg.x.all_triggers = {
+ trigger
+ for _, triggers in cfg.x.trigger_matrix
+ for trigger in triggers
+ }
+
+
+@selector
+def trigger_selection(
+ self: Selector,
+ events: ak.Array,
+ **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+
+ # start with an all-false mask
+ sel_trigger = ak.Array(np.zeros(len(events), dtype=bool))
+ veto_trigger = ak.Array(np.zeros(len(events), dtype=bool))
+
+ # pick events that passed one of the required triggers
+
+ for trigger in self.dataset_inst.x("require_triggers", []):
+ sel_trigger = sel_trigger | events.HLT[trigger]
+
+ # but reject events that also passed one of the triggers to veto
+ for trigger in self.dataset_inst.x("veto_triggers", []):
+ veto_trigger = veto_trigger & ~events.HLT[trigger]
+ sel_trigger = sel_trigger & ~events.HLT[trigger]
+
+ return events, SelectionResult(
+ steps={
+ "Trigger": sel_trigger, "VetoTrigger": veto_trigger
+ },
+ )
+
+
+@trigger_selection.init
+def trigger_selection_init(self: Selector) -> None:
+ # return immediately if config object has not been loaded yet
+ if not getattr(self, "config_inst", None):
+ return
+
+ # add HLT trigger bits to uses
+ self.uses |= {
+ f"HLT.{trigger}"
+ for trigger in self.config_inst.x.all_triggers
+ }
diff --git a/analysis_templates/ghent_template/__cf_module_name__/tasks/__init__.py b/analysis_templates/ghent_template/__cf_module_name__/tasks/__init__.py
new file mode 100644
index 000000000..07e64c984
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/tasks/__init__.py
@@ -0,0 +1,5 @@
+# coding: utf-8
+# flake8: noqa
+
+# provisioning imports
+import __cf_module_name__.tasks.base
diff --git a/analysis_templates/ghent_template/__cf_module_name__/tasks/base.py b/analysis_templates/ghent_template/__cf_module_name__/tasks/base.py
new file mode 100644
index 000000000..555e01838
--- /dev/null
+++ b/analysis_templates/ghent_template/__cf_module_name__/tasks/base.py
@@ -0,0 +1,12 @@
+# coding: utf-8
+
+"""
+Custom base tasks.
+"""
+
+from columnflow.tasks.framework.base import BaseTask
+
+
+class __cf_short_name_uc__Task(BaseTask):
+
+ task_namespace = "__cf_short_name_lc__"
diff --git a/analysis_templates/ghent_template/bin/githooks/post-commit b/analysis_templates/ghent_template/bin/githooks/post-commit
new file mode 100755
index 000000000..95aa6ed7e
--- /dev/null
+++ b/analysis_templates/ghent_template/bin/githooks/post-commit
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# Custom post-commit hook that runs the linter.
+
+action() {
+ # just lint
+ if [ ! -d "${__cf_short_name_uc___BASE}" ]; then
+ >&2 echo "__cf_short_name_uc___BASE not setup, skip linting"
+ elif [ "${__cf_short_name_uc___SKIP_POST_COMMIT}" != "1" ]; then
+ echo "post-commit linting ..."
+ bash "${__cf_short_name_uc___BASE}/tests/run_linting"
+ echo
+ fi
+
+ # always end successfully
+ return "0"
+}
+action "$@"
diff --git a/analysis_templates/ghent_template/law.cfg b/analysis_templates/ghent_template/law.cfg
new file mode 100644
index 000000000..7948523ba
--- /dev/null
+++ b/analysis_templates/ghent_template/law.cfg
@@ -0,0 +1,167 @@
+[core]
+
+# inherit from the columnflow configuration file
+inherit: $CF_BASE/law.cfg
+
+
+[modules]
+
+columnflow.tasks.cms.inference
+columnflow.tasks.cms.external
+__cf_module_name__.tasks
+
+
+[logging]
+
+law: INFO
+luigi-interface: INFO
+gfal2: WARNING
+columnflow.columnar_util-perf: INFO
+
+
+[target]
+
+tmp_dir: /pnfs/iihe/cms/store/user/$CF_CERN_USER/tmp
+tmp_dir_perm: 777
+
+
+[analysis]
+
+default_analysis: __cf_module_name__.analysis.__cf_short_name_lc__.__cf_short_name_lc__
+default_config: l18
+default_dataset: tt_sl_powheg
+
+calibration_modules: columnflow.calibration.cms.{jets,met}, __cf_module_name__.calibration.{default,jet}
+selection_modules: columnflow.selection.{empty}, columnflow.selection.cms.{json_filter, met_filters}, __cf_module_name__.selection.{default,categories,stats,trigger}
+production_modules: columnflow.production.{categories,normalization,processes,veto}, columnflow.production.cms.{btag,electron,mc_weight,muon,pdf,pileup,scale,seeds}, __cf_module_name__.production.{weights,features,categories}
+categorization_modules: __cf_module_name__.categorization.example
+ml_modules: columnflow.ml, __cf_module_name__.ml.example
+inference_modules: columnflow.inference, __cf_module_name__.inference.example
+
+# namespace of all columnflow tasks
+cf_task_namespace: cf
+
+# default sandbox for main tasks with standard packages for columnar processing
+default_columnar_sandbox: bash::$CF_BASE/sandboxes/venv_columnar.sh
+
+# wether or not the ensure_proxy decorator should be skipped, even if used by task's run methods
+skip_ensure_proxy: False
+
+# some remote workflow parameter defaults
+htcondor_flavor: $CF_HTCONDOR_FLAVOR
+htcondor_share_software: False
+slurm_flavor: $CF_SLURM_FLAVOR
+slurm_partition: $CF_SLURM_PARTITION
+
+# ChunkedIOHandler defaults
+chunked_io_chunk_size: 100000
+chunked_io_pool_size: 2
+chunked_io_debug: False
+
+# csv list of task families that inherit from ChunkedReaderMixin and whose output arrays should be
+# checked (raising an exception) for non-finite values before saving them to disk
+check_finite_output: cf.CalibrateEvents, cf.SelectEvents, cf.ProduceColumns
+
+# csv list of task families that inherit from ChunkedReaderMixin and whose input columns should be
+# checked (raising an exception) for overlaps between fields when created a merged input array
+check_overlapping_inputs: None
+
+# whether to log runtimes of array functions by default
+log_array_function_runtime: False
+
+
+[outputs]
+
+# list of all used file systems
+wlcg_file_systems: wlcg_fs_t2b_redirector, wlcg_fs, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
+
+# list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to
+# look for the correct fs per nano input file (in that order)
+lfn_sources: wlcg_fs_t2b_redirector, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
+
+shared_location: /pnfs/iihe/cms/store/user/$CF_CERN_USER/columnflow/data/__cf_module_name__
+
+# output locations per task family
+# for local targets : "local[, LOCAL_FS_NAME or STORE_PATH]"
+# for remote targets: "wlcg[, WLCG_FS_NAME]"
+# (when WLCG_FS_NAME is empty, the tasks' "default_wlcg_fs" attribute is used)
+# examples:
+# cf.CalibrateEvents: wlcg
+# cf.SelectEvents: local
+cf.GetDatasetLFNs: local, %(shared_location)s
+cf.CalibrateEvents: local, %(shared_location)s
+cf.CreatePileupWeights: local, %(shared_location)s
+cf.SelectEvents: local, %(shared_location)s
+cf.MergeSelectionStats: local, %(shared_location)s
+cf.MergeSelectionMasks: local, %(shared_location)s
+cf.ReduceEvents: local, %(shared_location)s
+cf.MergeReductionStats: local, %(shared_location)s
+cf.MergeReducedEvents: local, %(shared_location)s
+cf.ProduceColumns: local, %(shared_location)s
+cf.CreateHistograms: local, %(shared_location)s
+cf.MergeHistograms: local, %(shared_location)s
+
+
+
+[job]
+
+job_file_dir: $CF_JOB_BASE
+job_file_dir_cleanup: False
+
+# storage element (SE) and output directory on that SE for crab's internal output
+# (crab might not even move files there, but it is strictly required for crab's job submission)
+crab_storage_element: $CF_CRAB_STORAGE_ELEMENT
+crab_base_directory: $CF_CRAB_BASE_DIRECTORY
+
+# lcg setup file sourced in remote jobs to access gfal tools
+remote_lcg_setup: /cvmfs/grid.cern.ch/centos7-ui-200122/etc/profile.d/setup-c7-ui-python3-example.sh
+
+
+[local_fs]
+
+base: /
+
+
+[wlcg_fs_t2b_redirector]
+
+# set this to your desired location
+base: /pnfs/iihe/cms/ph/sc4
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+cache_mtime_patience: -1
+
+
+[wlcg_fs]
+
+# set this to your desired location
+base: root://eosuser.cern.ch/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/$CF_STORE_NAME
+create_file_dir: True
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 50GB
+
+
+[wlcg_fs_infn_redirector]
+
+base: root://xrootd-cms.infn.it/
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+cache_mtime_patience: -1
+
+
+[wlcg_fs_global_redirector]
+
+base: root://cms-xrd-global.cern.ch/
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+cache_mtime_patience: -1
\ No newline at end of file
diff --git a/analysis_templates/ghent_template/sandboxes/example.sh b/analysis_templates/ghent_template/sandboxes/example.sh
new file mode 100644
index 000000000..ee4b0b84a
--- /dev/null
+++ b/analysis_templates/ghent_template/sandboxes/example.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Script that sets up a virtual env in $CF_VENV_PATH.
+# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.
+
+action() {
+ local shell_is_zsh=$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+
+ # set variables and source the generic venv setup
+ export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
+ export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
+ export CF_VENV_REQUIREMENTS="${this_dir}/example.txt"
+
+ source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@"
+}
+action "$@"
diff --git a/analysis_templates/ghent_template/sandboxes/example.txt b/analysis_templates/ghent_template/sandboxes/example.txt
new file mode 100644
index 000000000..368f0ab06
--- /dev/null
+++ b/analysis_templates/ghent_template/sandboxes/example.txt
@@ -0,0 +1,8 @@
+# version 1
+
+git+https://github.com/CoffeaTeam/coffea.git@b9356b9#egg=coffea
+awkward~=2.0
+dask-awkward~=2023.1
+uproot~=5.0
+tabulate~=0.9
+tensorflow~=2.11
diff --git a/analysis_templates/ghent_template/sandboxes/example_dev.sh b/analysis_templates/ghent_template/sandboxes/example_dev.sh
new file mode 100644
index 000000000..37ceb6556
--- /dev/null
+++ b/analysis_templates/ghent_template/sandboxes/example_dev.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Script that sets up a virtual env in $CF_VENV_PATH.
+# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.
+
+action() {
+ local shell_is_zsh=$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+
+ # set variables and source the generic venv setup
+ export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
+ export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
+ export CF_VENV_REQUIREMENTS="${this_dir}/example.txt,${CF_BASE}/sandboxes/dev.txt"
+
+ source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@"
+}
+action "$@"
diff --git a/analysis_templates/ghent_template/setup.sh b/analysis_templates/ghent_template/setup.sh
new file mode 100644
index 000000000..379e8169b
--- /dev/null
+++ b/analysis_templates/ghent_template/setup.sh
@@ -0,0 +1,162 @@
+#!/usr/bin/env bash
+
+setup___cf_short_name_lc__() {
+ # Runs the project setup, leading to a collection of environment variables starting with either
+ # - "CF_", for controlling behavior implemented by columnflow, or
+ # - "__cf_short_name_uc___", for features provided by the analysis repository itself.
+ # Check the setup.sh in columnflow for documentation of the "CF_" variables. The purpose of all
+ # "__cf_short_name_uc___" variables is documented below.
+ #
+ # The setup also handles the installation of the software stack via virtual environments, and
+ # optionally an interactive setup where the user can configure certain variables.
+ #
+ #
+ # Arguments:
+ # 1. The name of the setup. "default" (which is itself the default when no name is set)
+ # triggers a setup with good defaults, avoiding all queries to the user and the writing of
+ # a custom setup file. See "interactive_setup()" for more info.
+ #
+ #
+ # Optinally preconfigured environment variables:
+ # None yet.
+ #
+ #
+ # Variables defined by the setup and potentially required throughout the analysis:
+ # __cf_short_name_uc___BASE
+ # The absolute analysis base directory. Used to infer file locations relative to it.
+ # __cf_short_name_uc___SETUP
+ # A flag that is set to 1 after the setup was successful.
+
+ # prevent repeated setups
+ if [ "${__cf_short_name_uc___SETUP}" = "1" ]; then
+ >&2 echo "the __cf_analysis_name__ analysis was already succesfully setup"
+ >&2 echo "re-running the setup requires a new shell"
+ return "1"
+ fi
+
+
+ #
+ # prepare local variables
+ #
+
+ local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+ local orig="${PWD}"
+ local setup_name="${1:-default}"
+ local setup_is_default="false"
+ [ "${setup_name}" = "default" ] && setup_is_default="true"
+
+ # zsh options
+ if ${shell_is_zsh}; then
+ emulate -L bash
+ setopt globdots
+ fi
+
+
+ #
+ # global variables
+ # (__cf_short_name_uc__ = __cf_analysis_name__, CF = columnflow)
+ #
+
+ # start exporting variables
+ export __cf_short_name_uc___BASE="${this_dir}"
+ export CF_BASE="${this_dir}/modules/columnflow"
+ export CF_REPO_BASE="${__cf_short_name_uc___BASE}"
+ export CF_REPO_BASE_ALIAS="__cf_short_name_uc___BASE"
+ export CF_SETUP_NAME="${setup_name}"
+
+ # load cf setup helpers
+ CF_SKIP_SETUP="1" source "${CF_BASE}/setup.sh" "" || return "$?"
+
+ # interactive setup
+ if [ "${CF_REMOTE_ENV}" != "1" ]; then
+ cf_setup_interactive_body() {
+ # pre-export the CF_FLAVOR which will be cms
+ export CF_FLAVOR="cms"
+
+ # query common variables
+ cf_setup_interactive_common_variables
+
+ # query specific variables
+ # nothing yet ...
+ }
+ cf_setup_interactive "${CF_SETUP_NAME}" "${__cf_short_name_uc___BASE}/.setups/${CF_SETUP_NAME}.sh" || return "$?"
+ fi
+
+ # continue the fixed setup
+ export CF_CONDA_BASE="${CF_CONDA_BASE:-${CF_SOFTWARE_BASE}/conda}"
+ export CF_VENV_BASE="${CF_VENV_BASE:-${CF_SOFTWARE_BASE}/venvs}"
+ export CF_CMSSW_BASE="${CF_CMSSW_BASE:-${CF_SOFTWARE_BASE}/cmssw}"
+
+
+ #
+ # common variables
+ #
+
+ cf_setup_common_variables || return "$?"
+
+
+ #
+ # minimal local software setup
+ #
+
+ cf_setup_software_stack "${CF_SETUP_NAME}" || return "$?"
+
+ # ammend paths that are not covered by the central cf setup
+ export PATH="${__cf_short_name_uc___BASE}/bin:${PATH}"
+ export PYTHONPATH="${__cf_short_name_uc___BASE}:${__cf_short_name_uc___BASE}/modules/cmsdb:${PYTHONPATH}"
+
+ # initialze submodules
+ if [ -e "${__cf_short_name_uc___BASE}/.git" ]; then
+ local m
+ for m in $( ls -1q "${__cf_short_name_uc___BASE}/modules" ); do
+ cf_init_submodule "${__cf_short_name_uc___BASE}" "modules/${m}"
+ done
+ fi
+
+
+ #
+ # git hooks
+ #
+
+ cf_setup_git_hooks || return "$?"
+
+
+ #
+ # law setup
+ #
+
+ export LAW_HOME="${LAW_HOME:-${__cf_short_name_uc___BASE}/.law}"
+ export LAW_CONFIG_FILE="${LAW_CONFIG_FILE:-${__cf_short_name_uc___BASE}/law.cfg}"
+
+ if which law &> /dev/null; then
+ # source law's bash completion scipt
+ source "$( law completion )" ""
+
+ # silently index
+ law index -q
+ fi
+
+ # finalize
+ export __cf_short_name_uc___SETUP="1"
+}
+
+main() {
+ # Invokes the main action of this script, catches possible error codes and prints a message.
+
+ # run the actual setup
+ if setup___cf_short_name_lc__ "$@"; then
+ cf_color green "__cf_analysis_name__ analysis successfully setup"
+ return "0"
+ else
+ local code="$?"
+ cf_color red "setup failed with code ${code}"
+ return "${code}"
+ fi
+}
+
+# entry point
+if [ "${__cf_short_name_uc___SKIP_SETUP}" != "1" ]; then
+ main "$@"
+fi
diff --git a/analysis_templates/ghent_template/tests/__init__.py b/analysis_templates/ghent_template/tests/__init__.py
new file mode 100644
index 000000000..e97ffe8b0
--- /dev/null
+++ b/analysis_templates/ghent_template/tests/__init__.py
@@ -0,0 +1,18 @@
+# coding: utf-8
+# flake8: noqa
+
+"""
+Entry point for all tests.
+"""
+
+__all__ = []
+
+# adjust the path to import the package
+import os
+import sys
+base = os.path.normpath(os.path.join(os.path.abspath(__file__), "../.."))
+sys.path.append(base)
+import __cf_module_name__ # noqa
+
+# import all tests
+# ...
diff --git a/analysis_templates/ghent_template/tests/run_all b/analysis_templates/ghent_template/tests/run_all
new file mode 100755
index 000000000..b14cbaad6
--- /dev/null
+++ b/analysis_templates/ghent_template/tests/run_all
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+# Script that triggers all run_* scripts in this directory with default arguments. By default, the
+# process is terminated if a script returns with a non-zero exit code.
+#
+# Arguments:
+# 1. The mode. When "force", all scripts are executed independenlty of non-zero exit codes of
+# previous scripts.
+
+action() {
+ local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+
+ # get arguments
+ local mode="${1}"
+
+ # local and global return codes
+ local ret_global="0"
+ local ret
+
+ # colored echo helper
+ cecho() {
+ local col="${1}"
+ local msg="${2}"
+ echo -e "\x1b[0;49;${col}m${msg}\x1b[0m"
+ }
+
+ # linting
+ cecho 35 "check linting ..."
+ bash "${this_dir}/run_linting"
+ ret="$?"
+ if [ "${ret}" != "0" ]; then
+ >&2 cecho 31 "run_linting failed with exit code ${ret}"
+ [ "${mode}" = "force" ] || return "${ret}"
+ ret_global="1"
+ else
+ cecho 32 "done"
+ fi
+
+ return "${ret_global}"
+}
+action "$@"
diff --git a/analysis_templates/ghent_template/tests/run_linting b/analysis_templates/ghent_template/tests/run_linting
new file mode 100755
index 000000000..c6d174c95
--- /dev/null
+++ b/analysis_templates/ghent_template/tests/run_linting
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+# Script that runs linting checks on selected files.
+
+action() {
+ local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+ local __cf_short_name_lc___dir="$( dirname "${this_dir}" )"
+
+ (
+ cd "${__cf_short_name_lc___dir}" && \
+ flake8 __cf_module_name__ tests
+ )
+}
+action "$@"
diff --git a/columnflow/calibration/cmsGhent/__init__.py b/columnflow/calibration/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/calibration/cmsGhent/lepton_mva.py b/columnflow/calibration/cmsGhent/lepton_mva.py
new file mode 100644
index 000000000..98c325afe
--- /dev/null
+++ b/columnflow/calibration/cmsGhent/lepton_mva.py
@@ -0,0 +1,160 @@
+"""
+Code to add lepton MVA to NanoAOD
+"""
+
+from collections import OrderedDict
+
+from columnflow.calibration import Calibrator, calibrator
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import
+from columnflow.columnar_util import set_ak_column, InsertableDict
+from columnflow.columnar_util_Ghent import TetraVec
+from columnflow.tasks.external import BundleExternalFiles
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+coffea = maybe_import("coffea")
+maybe_import("coffea.nanoevents.methods.nanoaod")
+
+
+@producer(
+ uses={
+ f"{lep}.{p}"
+ for lep in ["Muon", "Electron"]
+ for p in ["pt", "eta", "miniPFRelIso_all", "miniPFRelIso_chg", "jetRelIso", "dxy", "dz", "jetIdx",
+ "jetNDauCharged", "jetPtRelv2", "pfRelIso03_all", "sip3d"]
+ } | {"Jet.btagDeepFlavB", "Electron.mvaFall17V2noIso", "Muon.segmentComp"},
+ produces={
+ f"{lep}.{p}"
+ for lep in ["Muon", "Electron"]
+ for p in ["abseta", "miniPFRelIso_neutral", "jetPtRatio", "jetBTagDeepFlavor", "log_absdxy", "log_absdz"]
+ },
+)
+def lepton_mva_inputs_producer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+ """
+ collects all inputs to the TOP lepton MVA (v1) and makes the necessary transformations
+ """
+ for lepton_name in ["Muon", "Electron"]:
+ lepton = events[lepton_name]
+ matched_jet = lepton.jetIdx
+ is_matched = matched_jet != -1
+
+ # replace jetRelIso by the equivalent used in the MVA
+ # if no matched jet, jetRelIso == pfRelIso04_all in NanoAOD, but MVA assumes then zero
+ events = set_ak_column(events, f"{lepton_name}.jetPtRatio", 1. / (lepton.jetRelIso + 1.))
+
+ # matched deepJet score of closest jet if any (zero otherwise)
+ btag_values = ak.pad_none(events.Jet.btagDeepFlavB, target=1)[matched_jet]
+ events = set_ak_column(events, f"{lepton_name}.jetBTagDeepFlavor", ak.where(is_matched, btag_values, 0.))
+
+ # impact parameters in log
+ for impact in ["dxy", "dz"]:
+ events = set_ak_column(events, f"{lepton_name}.log_abs" + impact, np.log(np.abs(lepton[impact])))
+
+ # Relative mini-isolation with neutral PF objects
+ events = set_ak_column(events, f"{lepton_name}.miniPFRelIso_neutral", lepton.miniPFRelIso_all - lepton.miniPFRelIso_chg)
+
+ # absolute eta
+ events = set_ak_column(events, f"{lepton_name}.abseta", np.abs(lepton.eta))
+
+ return events
+
+
+_shared_mva_inputs = [
+ "pt",
+ "eta",
+ "jetNDauCharged",
+ "miniPFRelIso_chg",
+ "miniPFRelIso_neutral",
+ "jetPtRelv2",
+ "jetPtRatio",
+ "pfRelIso03_all",
+ "jetBTagDeepFlavor",
+ "sip3d",
+ "log_absdxy",
+ "log_absdz",
+]
+
+lepton_mva_inputs = {
+ "Electron": [*_shared_mva_inputs, "mvaFall17V2noIso"], # add "lost hits" for version 2
+ "Muon": [*_shared_mva_inputs, "segmentComp"],
+ "Lepton": _shared_mva_inputs
+}
+
+
+@calibrator(
+ uses={lepton_mva_inputs_producer},
+ produces={"Electron.mvaTOP", "Muon.mvaTOP"},
+ sandbox="bash::$SINGLETOP_BASE/sandboxes/venv_lepton_mva.sh",
+)
+def lepton_mva_producer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+ """
+ Produces the TOP lepton MVA (v1) scores.
+ Requires an external file in the config under ``lepton_mva.weights``:
+
+ .. code-block:: python
+
+ cfg.x.external_files = DotDict.wrap({
+ "lepton_mva":
+ "weights": {
+ "Muon": f"YOURDIRECTORY/mu_TOPUL18_XGB.weights.bin",
+ "Electron": f"YOURDIRECTORY/weights/el_TOPUL18_XGB.weights.bin",
+ },
+ })
+
+ Requires adding the environment venv_lepton_mva which included xgboost to the analysis or config. E.g.
+
+ analysis_inst.x.bash_sandboxes = [
+ "$CF_BASE/sandboxes/cf.sh",
+ "$CF_BASE/sandboxes/venv_lepton_mva.sh",
+ ]
+
+ """
+ events = self[lepton_mva_inputs_producer](events)
+ for lepton in ["Muon", "Electron"]:
+ features = [events[lepton][p] for p in lepton_mva_inputs[lepton]]
+ # set None values (e.g. when there is no matched jet) to zero
+ features = ak.fill_none(features, 0.)
+ # flatten into a numpy array of shape (ninstances, nfeatures)
+ counts = ak.num(features[0])
+ features = np.transpose(np.array(ak.flatten(features, axis=2)))
+ # make c-contiguous (rows are stored as contiguous blocks of memory.)
+ features = np.ascontiguousarray(features)
+
+ if np.any(features):
+ # call xgboost predictor
+ scores = self.mva[lepton].inplace_predict(features)
+ # unflatten into an awkward array
+ scores = ak.unflatten(scores, counts)
+ # set the scores as an additional field for muons
+ else:
+ scores = ak.zeros_like(events[lepton][lepton_mva_inputs[lepton][0]], dtype=np.float32)
+ events = set_ak_column(events, f"{lepton}.mvaTOP", scores)
+
+ return events
+
+
+@lepton_mva_producer.requires
+def lepton_mva_producer_requires(self: Calibrator, reqs: dict) -> None:
+ if "external_files" in reqs:
+ return
+ reqs["external_files"] = BundleExternalFiles.req(self.task)
+
+
+@lepton_mva_producer.setup
+def lepton_mva_producer_setup(
+ self: Calibrator,
+ reqs: dict,
+ inputs: dict,
+ reader_targets: InsertableDict,
+) -> None:
+ bundle = reqs["external_files"]
+
+ # create the xgboost predictor
+ import xgboost
+
+ self.mva = {}
+
+ for lepton in ["Electron", "Muon"]:
+ self.mva[lepton] = xgboost.Booster()
+ self.mva[lepton].load_model(bundle.files.lepton_mva["weights"][lepton].path)
diff --git a/columnflow/categorization/cmsGhent/__init__.py b/columnflow/categorization/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/columnar_util_Ghent.py b/columnflow/columnar_util_Ghent.py
new file mode 100644
index 000000000..54ccbfe2b
--- /dev/null
+++ b/columnflow/columnar_util_Ghent.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+
+"""
+Helpers and utilities for working with columnar libraries (Ghent cms group)
+"""
+
+from __future__ import annotations
+
+__all__ = [
+ "TetraVec", "safe_concatenate",
+]
+
+from columnflow.util import maybe_import
+
+ak = maybe_import("awkward")
+coffea = maybe_import("coffea")
+
+
+def TetraVec(arr: ak.Array) -> ak.Array:
+ """
+ create a Lorentz for fector from an awkward array with pt, eta, phi, and mass fields
+ """
+ for field in ["pt", "eta", "phi", "mass"]:
+ assert hasattr(arr, field), f"Provided array is missing {field} field"
+ TetraVec = ak.zip({"pt": arr.pt, "eta": arr.eta, "phi": arr.phi, "mass": arr.mass},
+ with_name="PtEtaPhiMLorentzVector",
+ behavior=coffea.nanoevents.methods.vector.behavior)
+ return TetraVec
+
+
+def safe_concatenate(arrays, *args, **kwargs):
+ n = len(arrays)
+ if n > 2 ** 7:
+ c1 = safe_concatenate(arrays[:n // 2], *args, **kwargs)
+ c2 = safe_concatenate(arrays[n // 2:], *args, **kwargs)
+ return ak.concatenate([c1, c2], *args, **kwargs)
+ return ak.concatenate(arrays, *args, **kwargs)
diff --git a/columnflow/inference/cmsGhent/__init__.py b/columnflow/inference/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/ml/cmsGhent/__init__.py b/columnflow/ml/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/plotting/cmsGhent/__init__.py b/columnflow/plotting/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/plotting/cmsGhent/colors.py b/columnflow/plotting/cmsGhent/colors.py
new file mode 100644
index 000000000..5f486bb5a
--- /dev/null
+++ b/columnflow/plotting/cmsGhent/colors.py
@@ -0,0 +1,114 @@
+class DefaultColors:
+ def __init__(self):
+ self.grey = "#94a4a2"
+ self.grey2 = "#717581"
+ self.grey3 = "#5D5F66"
+ self.grey4 = "#44464A"
+ self.grey5 = "#3D3E3F"
+
+ def pastel(self):
+ return PastelColors()
+
+ def rainbow(self):
+ return RainbowColors()
+
+ def cat_six(self):
+ return CATColorsSix()
+
+ def cat_ten(self):
+ return CATColorsTen()
+
+ def __getitem__(self, i):
+ return self.colors[i]
+
+
+class CATColorsSix(DefaultColors):
+ def __init__(self):
+ super().__init__()
+ self.red = "#e42536"
+ self.orange = "#f89c20"
+ self.blue = "#5790fc"
+ self.purple = "#7a21dd"
+ self.violet = "#964a8b"
+ self.grey = "#9c9ca1"
+ self.colors = [
+ self.red,
+ self.orange,
+ self.blue,
+ self.purple,
+ self.violet,
+ self.grey,
+ ]
+
+
+class CATColorsTen(DefaultColors):
+ def __init__(self):
+ super().__init__()
+ self.blue = "#3f90da"
+ self.blue_light = "#92dadd"
+ self.orange = "#ffa90e"
+ self.orange_dark = "#e76300"
+ self.red = "#bd1f01"
+ self.purple = "#832db6"
+ self.brown = "#a96b59"
+ self.ochre = "#b9ac70"
+ self.grey = "#94a4a2"
+ self.grey2 = "#717581"
+ self.colors = [
+ self.blue,
+ self.blue_light,
+ self.orange,
+ self.orange_dark,
+ self.red,
+ self.purple,
+ self.brown,
+ self.ochre,
+ self.grey,
+ self.grey2,
+ ]
+
+
+class RainbowColors(DefaultColors):
+ def __init__(self):
+ super().__init__()
+ self.purple = "#d23be7"
+ self.blue = "#4355db"
+ self.blue_light = "#34bbe6"
+ self.green = "#49da9a"
+ self.lime = "#a3e048"
+ self.yellow = "#f7d038"
+ self.orange = "#eb7532"
+ self.red = "#e6261f"
+ self.colors = [
+ self.purple,
+ self.blue,
+ self.blue_light,
+ self.green,
+ self.lime,
+ self.yellow,
+ self.orange,
+ self.red,
+ ]
+
+
+class PastelColors(DefaultColors):
+ def __init__(self):
+ super().__init__()
+ self.yellow = "#ffa600"
+ self.orange = "#ff843c"
+ self.orange_dark = "#ff6562"
+ self.red = "#fd5385"
+ self.violet_light = "#da52a2"
+ self.violet = "#ab59b5"
+ self.purple = "#6f5fba"
+ self.blue = "#1761b0"
+ self.colors = [
+ self.yellow,
+ self.orange,
+ self.orange_dark,
+ self.red,
+ self.violet_light,
+ self.violet,
+ self.purple,
+ self.blue,
+ ]
diff --git a/columnflow/plotting/plot_util.py b/columnflow/plotting/plot_util.py
index afa238bea..68c43ea2c 100644
--- a/columnflow/plotting/plot_util.py
+++ b/columnflow/plotting/plot_util.py
@@ -14,7 +14,7 @@
import order as od
-from columnflow.util import maybe_import, try_int
+from columnflow.util import maybe_import, try_int, try_complex
math = maybe_import("math")
hist = maybe_import("hist")
@@ -122,6 +122,17 @@ def apply_variable_settings(
h = h[{var_inst.name: hist.rebin(rebin_factor)}]
hists[proc_inst] = h
+ slices = getattr(var_inst, "slice", None) or var_inst.x("slice", None)
+ if (
+ slices and isinstance(slices, Iterable) and len(slices) >= 2 and
+ try_complex(slices[0]) and try_complex(slices[1])
+ ):
+ slice_0 = int(slices[0]) if try_int(slices[0]) else complex(slices[0])
+ slice_1 = int(slices[1]) if try_int(slices[1]) else complex(slices[1])
+ for proc_inst, h in list(hists.items()):
+ h = h[{var_inst.name: slice(slice_0, slice_1)}]
+ hists[proc_inst] = h
+
return hists
@@ -202,9 +213,16 @@ def prepare_style_config(
# disable minor ticks based on variable_inst
if variable_inst.discrete_x:
- # TODO: find sth better than plain bin edges or possibly memory intense range(*xlim)
- style_config["ax_cfg"]["xticks"] = variable_inst.bin_edges
+ # TODO: options for very large ranges, or non-uniform discrete x
+ tx = range(int(xlim[0]), int(xlim[1]+1))
+ style_config["ax_cfg"]["xticks"] = tx
style_config["ax_cfg"]["minorxticks"] = []
+
+ # add custom bin labels if specified and same amount of x ticks
+ if x_labels := variable_inst.x_labels:
+ if len(x_labels) == len(tx):
+ style_config["ax_cfg"]["xticklabels"] = x_labels
+
if variable_inst.discrete_y:
style_config["ax_cfg"]["minoryticks"] = []
diff --git a/columnflow/production/categories.py b/columnflow/production/categories.py
index 415b5dbd7..afd71a448 100644
--- a/columnflow/production/categories.py
+++ b/columnflow/production/categories.py
@@ -14,6 +14,7 @@
from columnflow.production import Producer, producer
from columnflow.util import maybe_import
from columnflow.columnar_util import set_ak_column
+from columnflow.columnar_util_Ghent import safe_concatenate
np = maybe_import("numpy")
ak = maybe_import("awkward")
@@ -48,7 +49,7 @@ def category_ids(
category_ids.append(ak.singletons(ak.nan_to_none(ids)))
# combine
- category_ids = ak.concatenate(category_ids, axis=1)
+ category_ids = safe_concatenate(category_ids, axis=1)
# save, optionally on a target events array
if target_events is None:
diff --git a/columnflow/production/cms/muon.py b/columnflow/production/cms/muon.py
index 79436a5b6..c702b4339 100644
--- a/columnflow/production/cms/muon.py
+++ b/columnflow/production/cms/muon.py
@@ -18,15 +18,15 @@
uses={
"Muon.pt", "Muon.eta",
},
- produces={
- "muon_weight", "muon_weight_up", "muon_weight_down",
- },
+ # produces in the init
# only run on mc
mc_only=True,
# function to determine the correction file
get_muon_file=(lambda self, external_files: external_files.muon_sf),
# function to determine the muon weight config
get_muon_config=(lambda self: self.config_inst.x.muon_sf_names),
+ weight_name="muon_weight",
+ supported_versions=(1, 2),
)
def muon_weights(
self: Producer,
@@ -83,7 +83,6 @@ def muon_weights(
"ValType": syst, # syst key in 2017
}
inputs = [variable_map_syst[inp.name] for inp in self.muon_sf_corrector.inputs]
-
sf_flat = self.muon_sf_corrector(*inputs)
# add the correct layout to it
@@ -93,7 +92,7 @@ def muon_weights(
weight = ak.prod(sf, axis=1, mask_identity=False)
# store it
- events = set_ak_column(events, f"muon_weight{postfix}", weight, value_type=np.float32)
+ events = set_ak_column(events, f"{self.weight_name}{postfix}", weight, value_type=np.float32)
return events
@@ -126,5 +125,11 @@ def muon_weights_setup(
self.muon_sf_corrector = correction_set[corrector_name]
# check versions
- if self.muon_sf_corrector.version not in (1,):
+ if self.supported_versions and self.muon_sf_corrector.version not in self.supported_versions:
raise Exception(f"unsuppprted muon sf corrector version {self.muon_sf_corrector.version}")
+
+
+@muon_weights.init
+def muon_weights_init(self: Producer, **kwargs) -> None:
+ weight_name = self.weight_name
+ self.produces |= {weight_name, f"{weight_name}_up", f"{weight_name}_down"}
diff --git a/columnflow/production/cmsGhent/__init__.py b/columnflow/production/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/production/cmsGhent/gen_features.py b/columnflow/production/cmsGhent/gen_features.py
new file mode 100644
index 000000000..b89b660f4
--- /dev/null
+++ b/columnflow/production/cmsGhent/gen_features.py
@@ -0,0 +1,117 @@
+from collections import defaultdict
+from typing import Tuple
+
+import law
+
+from columnflow.util import maybe_import, four_vec
+from columnflow.columnar_util import set_ak_column
+from columnflow.production import Producer, producer
+from columnflow.columnar_util_Ghent import TetraVec
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+coffea = maybe_import("coffea")
+
+
+def _geometric_matching(particles1: ak.Array, particles2: ak.Array) -> (ak.Array, ak.Array):
+ """
+ Returns two awkward arrays.
+ First contains that for each particle in **particles** the closest particle in the same event in **particles2**.
+ Second tells you whether the found closest particle is contained within a cone of 0.2.
+ """
+ particles1, particles2 = ak.unzip(ak.cartesian([particles1, particles2], axis=1, nested=True))
+ dr = particles1.delta_r(particles2)
+ drmin_idx = ak.argmin(dr, axis=-1, keepdims=True)
+ drmin = ak.flatten(dr[drmin_idx], axis=2)
+ closest_match = ak.flatten(particles2[drmin_idx], axis=2)
+ return closest_match, ak.fill_none(drmin < 0.2, False)
+
+
+# map of the status flag name to the corresponding bit in statusFlags
+_statusmap = ({
+ "isPrompt": 0,
+ "isDecayedLeptonHadron": 1,
+ "isTauDecayProduct": 2,
+ "isPromptTauDecayProduct": 3,
+ "isDirectTauDecayProduct": 4,
+ "isDirectPromptTauDecayProduct": 5,
+ "isDirectHadronDecayProduct": 6,
+ "isHardProcess": 7,
+ "fromHardProcess": 8,
+ "isHardProcessTauDecayProduct": 9,
+ "isDirectHardProcessTauDecayProduct": 10,
+ "fromHardProcessBeforeFSR": 11,
+ "isFirstCopy": 12,
+ "isLastCopy": 13,
+ "isLastCopyBeforeFSR": 14,
+ })
+
+# status flags that should be present for a prompt genparticle
+_prompt_status = ["isPrompt", "isDirectPromptTauDecayProduct", "isHardProcess",
+ "fromHardProcess", "fromHardProcessBeforeFSR"]
+
+
+@producer(
+ uses=four_vec(
+ ("Electron", "Muon"),
+ ("pdgId", "genPartIdx")) |
+ four_vec(
+ ("GenPart"),
+ ("pdgId", "status", "statusFlags")
+ ),
+ produces=four_vec(
+ {"Electron", "Muon"},
+ {"isPrompt", "matchPdgId", "isChargeFlip"}
+ ),
+ mc_only=True,
+ exposed=False,
+)
+def lepton_gen_features(
+ self: Producer,
+ events: ak.Array,
+ **kwargs,
+) -> ak.Array:
+
+ genpart = events.GenPart
+
+ for name, abs_pdgId in (("Electron", 11), ("Muon", 13)):
+
+ lepton = events[name]
+
+ # first check if already has a matched gen particle (include charge matching)
+ is_nanoAOD_matched = (lepton.genPartIdx >= 0)
+ is_nanoAOD_charge_matched = is_nanoAOD_matched & (lepton.pdgId == genpart.pdgId[lepton.genPartIdx])
+ matched_genpart = genpart[lepton.genPartIdx]
+
+ # if this fails apply geometric matching to stable leptons and photons
+
+ # select stable gen particles
+ stable_genpart = genpart[genpart.status == 1]
+
+ # first look for closest mathing generator lepton within cone of 0.2
+ gen_abs_pdgId = abs(stable_genpart.pdgId)
+ geom_match_lepton, lepton_within_cone = _geometric_matching(lepton, stable_genpart[gen_abs_pdgId == abs_pdgId])
+
+ # if not within cone of 0.2, allow for a photon match
+ geom_match_photon, photon_within_cone = _geometric_matching(lepton, stable_genpart[gen_abs_pdgId == 22])
+
+ # finally apply hierarchy to determine matched gen particle
+ match = ak.Array(ak.zeros_like(geom_match_photon))
+ match = ak.where(photon_within_cone, geom_match_photon, match)
+ match = ak.where(lepton_within_cone, geom_match_lepton, match)
+ match = ak.where(is_nanoAOD_charge_matched, matched_genpart, match)
+
+ # check for matched gen particle if it fulfills all status flags for being prompt
+ match_isPrompt = False
+ for status in _prompt_status:
+ match_isPrompt = match_isPrompt | (match.statusFlags & (1 << _statusmap[status]) != 0)
+
+ valid_match = is_nanoAOD_matched | lepton_within_cone | photon_within_cone
+ match_pdgId = (match.pdgId == lepton.pdgId) & valid_match
+ is_chargeflip = (match.pdgId == -lepton.pdgId) & valid_match
+
+ events = set_ak_column(events, f"{name}.isPrompt", ak.fill_none(match_isPrompt, False, axis=-1))
+ events = set_ak_column(events, f"{name}.matchPdgId", ak.fill_none(match_pdgId, False, axis=-1))
+ events = set_ak_column(events, f"{name}.isChargeFlip", ak.fill_none(is_chargeflip, False, axis=-1))
+
+ return events
diff --git a/columnflow/production/veto.py b/columnflow/production/veto.py
new file mode 100644
index 000000000..9f7e6a2f7
--- /dev/null
+++ b/columnflow/production/veto.py
@@ -0,0 +1,77 @@
+from collections import defaultdict
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import, InsertableDict
+from columnflow.columnar_util import set_ak_column
+from law import LocalFileTarget
+
+ak = maybe_import("awkward")
+np = maybe_import("numpy")
+
+
+@producer(
+ uses={"event", "run", "luminosityBlock"},
+ produces={"veto"},
+ exposed=False,
+ get_veto_file=(lambda self, external_files: external_files.veto),
+)
+def veto_events(
+ self: Producer,
+ events: ak.Array,
+ file: LocalFileTarget = None,
+ **kwargs,
+) -> ak.Array:
+ """
+ Produces a mask vetoing certain events from being processed. Outputs a SelectionResult
+ with attributes *veto* (containing a mask selecting the vetoed events) and with the *event*
+ attribute initialized with a mask selecting non-vetoed events. If *file* is provided, it checks only
+ events contained within this file, or events not designated to any file.
+
+ The events that are vetoed need to be specified from ``config_inst``,
+ which must contain the keyword ``veto`` in the auxiliary information. This can look
+ like this:
+
+ .. code-block:: python
+
+ # cfg is the current config instance
+ cfg.x.veto = config.x.veto = {
+ "dy_lep_m10to50_amcatnlo" : [
+ {
+ "event": 33098036,
+ "luminosityBlock": 20170,
+ "run": 1,
+ ** optionally **
+ "file": "/store/mc/RunIISummer20UL18NanoAODv9/DYJetsToLL_M-10to50_TuneCP5_13TeV-amcatnloFXFX-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/50000/296CA60E-0122-2F4F-8B04-17DCF5E3E062.root" # noqa
+ }
+ ]
+ }
+
+ """
+
+ veto = np.full_like(events.event, False, dtype=bool)
+ for veto_event in self.veto_list:
+ if file is None or "file" not in veto_event or file.path == veto_event["file"]:
+ veto = veto | (
+ (events.event == veto_event['event']) &
+ (events.run == veto_event['run']) &
+ (events.luminosityBlock == veto_event['luminosityBlock'])
+ )
+
+ events = set_ak_column(events, "veto", veto)
+
+ return events
+
+
+@veto_events.setup
+def veto_events_setup(
+ self: Producer,
+ reqs: dict,
+ inputs: dict,
+ reader_targets: InsertableDict,
+) -> None:
+ """
+ Loads the event veto file from the external files bundle and saves them in the
+ py:attr:`veto_list` attribute for simpler access in the actual callable.
+ """
+ veto_dict = self.config_inst.aux.get("veto", {})
+ self.veto_list = veto_dict.get(self.dataset_inst.name, [])
diff --git a/columnflow/selection/cmsGhent/__init__.py b/columnflow/selection/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/selection/cmsGhent/lepton_mva_cuts.py b/columnflow/selection/cmsGhent/lepton_mva_cuts.py
new file mode 100644
index 000000000..f85ca42a7
--- /dev/null
+++ b/columnflow/selection/cmsGhent/lepton_mva_cuts.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+
+"""
+Selection modules for object selection of Muon, Electron, and Jet.
+"""
+
+from collections import defaultdict
+from typing import Tuple, Literal, Dict
+
+import law
+
+from columnflow.util import maybe_import, four_vec
+from columnflow.columnar_util import set_ak_column, optional_column
+from columnflow.production.util import attach_coffea_behavior
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.util import masked_sorted_indices
+
+ak = maybe_import("awkward")
+
+
+@selector(
+ uses=(
+ four_vec({"Electron", "Muon"}, {"dxy", "dz", "sip3d", "miniPFRelIso_all"}) |
+ {"Electron.lostHits", "Electron.deltaEtaSC", "Muon.mediumId"} |
+ optional_column("Electron.mvaTOP", "Muon.mvaTOP")
+ ),
+)
+def lepton_mva_object(
+ self: Selector,
+ events: ak.Array,
+ working_point: 'Dict[Listeral["Muon", "Electron"], str] | str'="veto",
+ **kwargs,
+) -> Tuple[ak.Array, SelectionResult]:
+ """
+ The following cuts are the cuts that are required to be able to use the lepton MVA. Leptons that are
+ passing these cuts are referred to as "veto" leptons.
+ No additional cuts should be applied for the available scale factors to apply, except on p_T and eta.
+
+ :param events: Array containing events in the NanoAOD format
+ :param working_point: name of the working_point or dict mapping leptons to working points to apply to the muons
+ and electrons outputted in the SelectionResult
+ :return: Tuple containing the events array and a :py:class:`~columnflow.selection.SelectionResult`
+ with selected Muon and Electron objects passing **working_point**. The event array has extra Muon and Electron
+ boolean fields for the veto definition, as well as the TOP mva working points if the mvaTOP field is present in the
+ event.Muon and event.Electron fields
+
+ """
+ if isinstance(working_point, str):
+ working_point = {l: working_point for l in ["Muon", "Electron"]}
+ if set(working_point.values()) != {"veto"}:
+ assert working_point in self.config_inst.x.top_mva_wps
+ assert "mvaTOP" in events.Electron.fields
+ assert "mvaTOP" in events.Muon.fields
+
+ # conditions differing for muons and leptons
+ ele, mu = events.Electron, events.Muon
+ ele_absetaSC = abs(ele.eta + ele.deltaEtaSC)
+ masks = {
+ "Electron": (abs(ele.eta) < 2.5) & (ele.lostHits < 2) & ((ele_absetaSC > 1.5560) | (ele_absetaSC < 1.4442)),
+ "Muon": (abs(events.Muon.eta) < 2.4) & events.Muon.mediumId,
+ }
+
+ # conditions shared for muons and leptons
+ for lepton_name in masks:
+ lepton = events[lepton_name]
+ veto_mask = masks[lepton_name] & (
+ (lepton.pt > 10) &
+ (lepton.miniPFRelIso_all < 0.4) &
+ (lepton.sip3d < 8) &
+ (lepton.dz < 0.1) &
+ (lepton.dxy < 0.05)
+ )
+ events = set_ak_column(events, f"{lepton_name}.veto", veto_mask)
+ if "mvaTOP" in lepton.fields:
+ wps = self.config_inst.x.top_mva_wps
+ for wp in wps:
+ events = set_ak_column(events, f"{lepton_name}.{wp}",
+ events[lepton_name]["veto"] &
+ (lepton.mvaTOP > wps[wp])
+ )
+ return events, SelectionResult(
+ steps={},
+ objects={
+ lep:
+ {lep: masked_sorted_indices(events[lep][working_point[lep]], events[lep].pt)}
+ for lep in ["Muon", "Electron"]
+ },
+ )
diff --git a/columnflow/selection/util.py b/columnflow/selection/util.py
index 69618caed..c8123e1fb 100644
--- a/columnflow/selection/util.py
+++ b/columnflow/selection/util.py
@@ -80,3 +80,11 @@ def create_collections_from_masks(
events = set_ak_column(events, dst_name, dst_collection)
return events
+
+
+def masked_sorted_indices(mask: ak.Array, sort_var: ak.Array, ascending: bool = False) -> ak.Array:
+ """
+ Helper function to obtain the correct indices of an object mask
+ """
+ indices = ak.argsort(sort_var, axis=-1, ascending=ascending)
+ return indices[mask[indices]]
diff --git a/columnflow/tasks/cmsGhent/__init__.py b/columnflow/tasks/cmsGhent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/columnflow/tasks/cmsGhent/config.py b/columnflow/tasks/cmsGhent/config.py
new file mode 100644
index 000000000..d336db636
--- /dev/null
+++ b/columnflow/tasks/cmsGhent/config.py
@@ -0,0 +1,46 @@
+import law
+import luigi
+from columnflow.tasks.framework.base import ConfigTask
+from columnflow.tasks.framework.mixins import DatasetsProcessesMixin
+
+import json
+
+
+class ReadDataSets(DatasetsProcessesMixin, ConfigTask):
+
+ shifts = luigi.BoolParameter(
+ default=False,
+ significant=False,
+ description="when True, print the shifted datasets, not the nominal",
+ )
+
+ def output(self) -> law.target.file.FileSystemFileTarget:
+ """
+ Creates a target file for the final .json file containing the list of datasets
+
+ """
+ return self.target(("shifts" if self.shifts else "nominal") + ".json")
+
+ def complete(self):
+ return self.output().exists()
+
+ def run(self):
+ process_dataset_map = {p: [] for p in self.processes}
+
+ for dt in self.datasets:
+ dt = self.config_inst.get_dataset(dt)
+ datasets = []
+ process = list(dt.processes)[0]
+ for p in self.processes:
+ p_inst = self.config_inst.get_process(p)
+ if p_inst.has_process(process) or p_inst == process:
+ datasets = process_dataset_map[p]
+ datasets_loc: dict = dt.info.copy()
+ nominal = datasets_loc.pop('nominal')
+ if self.shifts:
+ for shift in datasets_loc.values():
+ datasets.extend(shift.keys)
+ else:
+ datasets.extend(nominal.keys)
+
+ self.output().dump(process_dataset_map, indent=2)
diff --git a/columnflow/tasks/cmsGhent/cutflow.py b/columnflow/tasks/cmsGhent/cutflow.py
new file mode 100644
index 000000000..08c0edf79
--- /dev/null
+++ b/columnflow/tasks/cmsGhent/cutflow.py
@@ -0,0 +1,265 @@
+
+from collections import defaultdict
+from scinum import Number
+
+
+from ..cutflow import *
+from columnflow.tasks.framework.decorators import view_output_plots
+from columnflow.tasks.framework.mixins import (
+ CalibratorsMixin, SelectorStepsMixin, CategoriesMixin, DatasetsProcessesMixin
+)
+
+import luigi
+import law
+from columnflow.util import maybe_import, DotDict, dev_sandbox, try_int
+
+np = maybe_import("numpy")
+
+
+class CreateCutflowTable(
+ DatasetsProcessesMixin,
+ CategoriesMixin,
+ SelectorStepsMixin,
+ CalibratorsMixin,
+ law.LocalWorkflow,
+ RemoteWorkflow,
+):
+ sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox"))
+
+ table_format = luigi.Parameter(
+ default="fancy_grid",
+ significant=False,
+ description="format of the yield table; accepts all formats of the tabulate package; default: fancy_grid. "
+ "See https://github.com/astanin/python-tabulate/blob/master/README.md?plain=1#L147",
+ )
+ number_format = luigi.Parameter(
+ default="pdg",
+ significant=False,
+ description="rounding format of each number in the yield table; accepts all formats "
+ "understood by scinum.Number.str(), e.g. 'pdg', 'publication', '%%.1f' or an integer "
+ "(number of signficant digits); default: pdg",
+ )
+ skip_uncertainties = luigi.BoolParameter(
+ default=False,
+ significant=False,
+ description="when True, uncertainties are not displayed in the table; default: False",
+ )
+ normalize_yields = luigi.ChoiceParameter(
+ choices=(law.NO_STR, "per_process", "per_step", "per_process_100", "per_step_100", "all_100"),
+ default=law.NO_STR,
+ significant=False,
+ description="string parameter to define the normalization of the yields; "
+ "choices: '', per_process, per_category, all; Append 100 to express as percentage; empty default",
+ )
+ output_suffix = luigi.Parameter(
+ default=law.NO_STR,
+ description="Adds a suffix to the output name of the yields table; empty default",
+ )
+
+ selector_steps_order_sensitive = True
+
+ # upstream requirements
+ reqs = Requirements(
+ RemoteWorkflow.reqs,
+ MergeCutflowHistograms=MergeCutflowHistograms,
+ )
+
+ def create_branch_map(self):
+ # one category per branch
+ if not self.categories:
+ raise Exception(
+ f"{self.__class__.__name__} task cannot build branch map when no category is "
+ "set",
+ )
+
+ return list(self.categories)
+
+ def workflow_requires(self):
+ reqs = super().workflow_requires()
+
+ reqs["hists"] = [
+ self.reqs.MergeCutflowHistograms.req(
+ self,
+ dataset=d,
+ variables=("event",),
+ _exclude={"branches"},
+ )
+ for d in self.datasets
+ ]
+ return reqs
+
+ def requires(self):
+ return {
+ d: self.reqs.MergeCutflowHistograms.req(
+ self,
+ branch=0,
+ dataset=d,
+ variables=("event",),
+ )
+ for d in self.datasets
+ }
+
+ @classmethod
+ def resolve_param_values(cls, params):
+ params = super().resolve_param_values(params)
+
+ if "number_format" in params and try_int(params["number_format"]):
+ # convert 'number_format' in integer if possible
+ params["number_format"] = int(params["number_format"])
+
+ return params
+
+ def output(self):
+ suffix = ""
+ if self.output_suffix and self.output_suffix != law.NO_STR:
+ suffix = f"__{self.output_suffix}"
+
+ return {
+ "table": self.target(f"table__proc_{self.processes_repr}__steps_{self.branch_data}{suffix}.txt"),
+ "yields": self.target(f"yields__proc_{self.processes_repr}__steps_{self.branch_data}{suffix}.json"),
+ }
+
+ @law.decorator.log
+ def run(self):
+ import hist
+ from tabulate import tabulate
+
+ inputs = self.input()
+ outputs = self.output()
+
+ category_inst = self.config_inst.get_category(self.branch_data)
+ leaf_category_insts = category_inst.get_leaf_categories() or [category_inst]
+ process_insts = list(map(self.config_inst.get_process, self.processes))
+ sub_process_insts = {
+ proc: [sub for sub, _, _ in proc.walk_processes(include_self=True)]
+ for proc in process_insts
+ }
+
+ # histogram data per process
+ hists = {}
+
+ with self.publish_step(f"Creating cutflow table in {category_inst.name}"):
+ for dataset, inp in inputs.items():
+ dataset_inst = self.config_inst.get_dataset(dataset)
+
+ # load the histogram of the variable named "event"
+ h_in = inp["hists"]["event"].load(formatter="pickle")
+
+ # sanity checks
+ n_shifts = len(h_in.axes["shift"])
+ if n_shifts != 1:
+ raise Exception(f"shift axis is supposed to only contain 1 bin, found {n_shifts}")
+
+ # loop and extract one histogram per process
+ for process_inst in process_insts:
+ # skip when the dataset is already known to not contain any sub process
+ if not any(map(dataset_inst.has_process, sub_process_insts[process_inst])):
+ continue
+
+ # work on a copy
+ h = h_in.copy()
+
+ # axis selections
+ h = h[{
+ "process": [
+ hist.loc(p.id)
+ for p in sub_process_insts[process_inst]
+ if p.id in h.axes["process"]
+ ],
+ "category": [
+ hist.loc(c.id)
+ for c in leaf_category_insts
+ if c.id in h.axes["category"]
+ ],
+ }]
+
+ # axis reductions
+ h = h[{"shift": sum, "process": sum, "category": sum, "event": sum}]
+
+ # add the histogram
+ if process_inst in hists:
+ hists[process_inst] += h
+ else:
+ hists[process_inst] = h
+
+ # there should be hists to plot
+ if not hists:
+ raise Exception("no histograms found to plot")
+
+ # sort hists by process order
+ hists = OrderedDict(
+ (process_inst, hists[process_inst])
+ for process_inst in sorted(hists, key=process_insts.index)
+ )
+
+ yields, processes = defaultdict(list), []
+
+ # read out yields per step and per process
+ for process_inst, h in hists.items():
+ processes.append(process_inst)
+
+ for step in self.selector_steps:
+ h_step = h[{"step": [step]}]
+ h_step = h_step[{"step": sum}]
+ value = Number(h_step.value)
+ if not self.skip_uncertainties:
+ # set a unique uncertainty name for correct propagation below
+ value.set_uncertainty(
+ f"mcstat_{process_inst.name}_{step}",
+ np.sqrt(h_step.variance),
+ )
+ yields[step].append(value)
+
+ # obtain normalizaton factors
+ norm_factors = 0.01 if '100' in self.normalize_yields else 1
+ if self.normalize_yields == "all":
+ norm_factors *= sum(
+ sum(step_yields)
+ for step_yields in yields.values()
+ )
+ elif self.normalize_yields.startswith("per_process"):
+ norm_factors = [
+ norm_factors * sum(yields[step][i] for step in yields.keys())
+ for i in range(len(yields[self.selector_steps[0]]))
+ ]
+ elif self.normalize_yields.startswith("per_step"):
+ norm_factors = {
+ step: norm_factors * sum(step_yields)
+ for step, step_yields in yields.items()
+ }
+
+ # initialize dicts
+ yields_str = defaultdict(list, {"Process": [proc.label for proc in processes]})
+ raw_yields = defaultdict(dict, {})
+
+ # apply normalization and format
+ for step, step_yields in yields.items():
+ for i, value in enumerate(step_yields):
+ # get correct norm factor per category and process
+ if self.normalize_yields.startswith("per_process"):
+ norm_factor = norm_factors[i]
+ elif self.normalize_yields.startswith("per_step"):
+ norm_factor = norm_factors[step]
+ else:
+ norm_factor = norm_factors
+
+ raw_yield = (value / norm_factor).nominal
+ raw_yields[step][processes[i].name] = raw_yield
+
+ # format yields into strings
+ yield_str = (value / norm_factor).str(
+ combine_uncs="all",
+ format=self.number_format,
+ style="latex" if "latex" in self.table_format else "plain",
+ )
+ if "latex" in self.table_format:
+ yield_str = f"${yield_str}$"
+ yields_str[step].append(yield_str)
+
+ # create, print and save the yield table
+ yield_table = tabulate(yields_str, headers="keys", tablefmt=self.table_format)
+ self.publish_message(yield_table)
+
+ outputs["table"].dump(yield_table, formatter="text")
+ outputs["yields"].dump(raw_yields, formatter="json")
+
diff --git a/columnflow/tasks/cutflow.py b/columnflow/tasks/cutflow.py
index a75f0721d..5c16dbee7 100644
--- a/columnflow/tasks/cutflow.py
+++ b/columnflow/tasks/cutflow.py
@@ -15,15 +15,20 @@
Requirements, AnalysisTask, DatasetTask, ShiftTask, wrapper_factory,
)
from columnflow.tasks.framework.mixins import (
- CalibratorsMixin, SelectorStepsMixin, VariablesMixin, CategoriesMixin, ChunkedIOMixin,
+ CalibratorsMixin, SelectorStepsMixin, VariablesMixin, CategoriesMixin, ChunkedIOMixin, MergeHistogramMixin
)
from columnflow.tasks.framework.plotting import (
PlotBase, PlotBase1D, PlotBase2D, ProcessPlotSettingMixin, VariablePlotSettingMixin,
)
from columnflow.tasks.framework.decorators import view_output_plots
from columnflow.tasks.framework.remote import RemoteWorkflow
-from columnflow.tasks.selection import MergeSelectionMasks
-from columnflow.util import DotDict, dev_sandbox
+from columnflow.tasks.external import GetDatasetLFNs
+from columnflow.tasks.selection import SelectEvents
+from columnflow.tasks.calibration import CalibrateEvents
+from columnflow.production import Producer
+from columnflow.util import DotDict, dev_sandbox, maybe_import
+
+np = maybe_import("numpy")
class CreateCutflowHistograms(
@@ -46,32 +51,64 @@ class CreateCutflowHistograms(
# upstream requirements
reqs = Requirements(
RemoteWorkflow.reqs,
- MergeSelectionMasks=MergeSelectionMasks,
+ GetDatasetLFNs=GetDatasetLFNs,
+ CalibrateEvents=CalibrateEvents,
+ SelectEvents=SelectEvents,
)
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # store the normalization weight producer for MC
+ self.norm_weight_producer = None
+ if self.dataset_inst.is_mc:
+ self.norm_weight_producer = Producer.get_cls("normalization_weights")(
+ inst_dict=self.get_producer_kwargs(self),
+ )
+
# strategy for handling missing source columns when adding aliases on event chunks
missing_column_alias_strategy = "original"
- def create_branch_map(self):
- # dummy branch map
- return [None]
-
def workflow_requires(self):
reqs = super().workflow_requires()
+ reqs["lfns"] = self.reqs.GetDatasetLFNs.req(self)
+ if not self.pilot:
+ reqs["calibrations"] = [
+ self.reqs.CalibrateEvents.req(self, calibrator=calibrator_inst.cls_name)
+ for calibrator_inst in self.calibrator_insts
+ if calibrator_inst.produced_columns
+ ]
+ reqs["selection"] = self.reqs.SelectEvents.req(self)
+ else:
+ # pass-through pilot workflow requirements of upstream task
+ t = self.reqs.SelectEvents.req(self)
+ reqs = law.util.merge_dicts(reqs, t.workflow_requires(), inplace=True)
- reqs["selection"] = self.reqs.MergeSelectionMasks.req(self, tree_index=0, _exclude={"branches"})
+ if self.dataset_inst.is_mc:
+ reqs["normalization"] = self.norm_weight_producer.run_requires()
return reqs
def requires(self):
- return {
- "selection": self.reqs.MergeSelectionMasks.req(self, tree_index=0, branch=0),
+ reqs = {
+ "lfns": self.reqs.GetDatasetLFNs.req(self),
+ "calibrations": [
+ self.reqs.CalibrateEvents.req(self, calibrator=calibrator_inst.cls_name)
+ for calibrator_inst in self.calibrator_insts
+ if calibrator_inst.produced_columns
+ ],
+ "selection": self.reqs.SelectEvents.req(self),
}
+ if self.dataset_inst.is_mc:
+ reqs["normalization"] = self.norm_weight_producer.run_requires()
+
+ return reqs
+
+ # TODO: CreateHistograms has a @MergeReducedEventsUser.maybe_dummy here
def output(self):
return {
- var: self.target(f"cutflow_hist__{var}.pickle")
- for var in self.variables
+ "hists": self.target(f"histograms__vars_{self.variables_repr}__{self.branch}.pickle"),
}
@law.decorator.log
@@ -81,10 +118,11 @@ def run(self):
import hist
import numpy as np
import awkward as ak
- from columnflow.columnar_util import Route, add_ak_aliases
+ from columnflow.columnar_util import Route, add_ak_aliases, mandatory_coffea_columns, update_ak_array
# prepare inputs and outputs
inputs = self.input()
+ lfn_task = self.requires()["lfns"]
# create a temp dir for saving intermediate files
tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
@@ -93,10 +131,17 @@ def run(self):
# get shift dependent aliases
aliases = self.local_shift_inst.x("column_aliases", {})
+ # setup the normalization weights producer
+ if self.dataset_inst.is_mc:
+ self.norm_weight_producer.run_setup(
+ self.requires()["normalization"],
+ self.input()["normalization"],
+ )
+
# define columns that need to be read
read_columns = {"category_ids", "process_id"} | set(aliases.values())
if self.dataset_inst.is_mc:
- read_columns |= {"normalization_weight"}
+ read_columns |= self.norm_weight_producer.used_columns
read_columns = {Route(c) for c in read_columns}
# define steps
@@ -120,7 +165,9 @@ def run(self):
expressions[variable_inst.name] = expr
# prepare columns to load
- load_columns = {("events" + route) for route in read_columns} | {Route("steps.*")}
+ load_columns = read_columns | set(mandatory_coffea_columns)
+ load_nano_columns = {("events" + route) for route in read_columns} | set(mandatory_coffea_columns)
+ load_sel_columns = {Route("steps.*")}
# prepare histograms
histograms = {}
@@ -147,16 +194,35 @@ def prepare_hists(steps):
# enable weights and store it
histograms[var_key] = h.Weight()
- for arr, pos in self.iter_chunked_io(
- inputs["selection"]["masks"].path,
- source_type="awkward_parquet",
- read_columns=load_columns,
+ # let the lfn_task prepare the nano file (basically determine a good pfn)
+ [(lfn_index, input_file)] = lfn_task.iter_nano_files(self)
+
+ # open the input file with uproot
+ with self.publish_step("load and open ..."):
+ nano_file = input_file.load(formatter="uproot")
+
+ input_paths = [nano_file]
+ input_paths.append(inputs["selection"]["results"].path)
+ input_paths.extend([inp["columns"].path for inp in inputs["calibrations"]])
+ if self.selector_inst.produced_columns:
+ input_paths.append(inputs["selection"]["columns"].path)
+
+ for (events, sel, *diffs), pos in self.iter_chunked_io(
+ input_paths,
+ source_type=["coffea_root"] + (len(input_paths) - 1) * ["awkward_parquet"],
+ read_columns=[load_nano_columns, load_sel_columns] + (len(input_paths) - 2) * [load_columns],
):
- events = arr.events
+
+ # add the calibrated diffs and potentially new columns
+ events = update_ak_array(events, *diffs)
+
+ # add normalization weight
+ if self.dataset_inst.is_mc:
+ events = self.norm_weight_producer(events)
# overwrite steps if not defined yet
- if not steps:
- steps = arr.steps.fields
+ if steps == self.selector_steps_default:
+ steps = sel.steps.fields
# prepare histograms and exprepssions once
if not histograms:
@@ -203,12 +269,12 @@ def get_point(mask=Ellipsis):
# fill all other steps
mask = True
for step in steps:
- if step not in arr.steps.fields:
+ if step not in sel.steps.fields:
raise ValueError(
f"step '{step}' is not defined by selector {self.selector}",
)
# incrementally update the mask and fill the point
- mask = mask & arr.steps[step]
+ mask = mask & sel.steps[step]
fill_kwargs = get_point(mask)
arrays = ak.flatten(ak.cartesian(fill_kwargs))
histograms[var_key].fill(
@@ -217,8 +283,7 @@ def get_point(mask=Ellipsis):
)
# dump the histograms
- for var_key in histograms.keys():
- self.output()[var_key].dump(histograms[var_key], formatter="pickle")
+ self.output()["hists"].dump(histograms, formatter="pickle")
CreateCutflowHistogramsWrapper = wrapper_factory(
@@ -228,6 +293,30 @@ def get_point(mask=Ellipsis):
)
+class MergeCutflowHistograms(
+ MergeHistogramMixin,
+ SelectorStepsMixin,
+ CalibratorsMixin,
+ DatasetTask,
+ RemoteWorkflow,
+):
+ sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox"))
+
+ # upstream requirements
+ reqs = Requirements(
+ RemoteWorkflow.reqs,
+ CreateHistograms=CreateCutflowHistograms,
+ )
+
+ selector_steps_order_sensitive = True
+
+MergeCutflowHistogramsWrapper = wrapper_factory(
+ base_cls=AnalysisTask,
+ require_cls=MergeCutflowHistograms,
+ enable=["configs", "skip_configs", "datasets", "skip_datasets", "shifts", "skip_shifts"],
+)
+
+
class PlotCutflowBase(
SelectorStepsMixin,
CategoriesMixin,
@@ -246,7 +335,7 @@ class PlotCutflowBase(
# upstream requirements
reqs = Requirements(
RemoteWorkflow.reqs,
- CreateCutflowHistograms=CreateCutflowHistograms,
+ MergeCutflowHistograms=MergeCutflowHistograms,
)
def store_parts(self):
@@ -273,6 +362,18 @@ class PlotCutflow(
f"default: '{CreateCutflowHistograms.default_variables[0]}'",
)
+ relative = luigi.BoolParameter(
+ default=False,
+ significant=False,
+ description="plot cutflow as fraction of total at each step",
+ )
+
+ skip_initial = luigi.BoolParameter(
+ default=False,
+ significant=False,
+ description="do not plot the event selection before applying any steps",
+ )
+
# upstream requirements
reqs = Requirements(
PlotCutflowBase.reqs,
@@ -293,7 +394,7 @@ def workflow_requires(self):
reqs = super().workflow_requires()
reqs["hists"] = [
- self.reqs.CreateCutflowHistograms.req(
+ self.reqs.MergeCutflowHistograms.req(
self,
dataset=d,
variables=(self.variable,),
@@ -305,7 +406,7 @@ def workflow_requires(self):
def requires(self):
return {
- d: self.reqs.CreateCutflowHistograms.req(
+ d: self.reqs.MergeCutflowHistograms.req(
self,
branch=0,
dataset=d,
@@ -340,7 +441,7 @@ def run(self):
with self.publish_step(f"plotting cutflow in {category_inst.name}"):
for dataset, inp in self.input().items():
dataset_inst = self.config_inst.get_dataset(dataset)
- h_in = inp[self.variable].load(formatter="pickle")
+ h_in = inp["hists"][self.variable].load(formatter="pickle")
# sanity checks
n_shifts = len(h_in.axes["shift"])
@@ -373,6 +474,9 @@ def run(self):
# axis reductions
h = h[{"process": sum, "category": sum, self.variable: sum}]
+ if self.skip_initial:
+ h = h[{"step": self.selector_steps}]
+
# add the histogram
if process_inst in hists:
hists[process_inst] += h
@@ -383,9 +487,11 @@ def run(self):
if not hists:
raise Exception("no histograms found to plot")
+ total = sum(hists.values()).values() if self.relative else np.ones((len(self.selector_steps) + 1, 1))
+
# sort hists by process order
hists = OrderedDict(
- (process_inst.copy_shallow(), hists[process_inst])
+ (process_inst.copy_shallow(), hists[process_inst] / total)
for process_inst in sorted(hists, key=process_insts.index)
)
@@ -463,14 +569,14 @@ def create_branch_map(self):
def workflow_requires(self):
reqs = super().workflow_requires()
reqs["hists"] = [
- self.reqs.CreateCutflowHistograms.req(self, dataset=d, _exclude={"branches"})
+ self.reqs.MergeCutflowHistograms.req(self, dataset=d, _exclude={"branches"})
for d in self.datasets
]
return reqs
def requires(self):
return {
- d: self.reqs.CreateCutflowHistograms.req(self, dataset=d, branch=0)
+ d: self.reqs.MergeCutflowHistograms.req(self, dataset=d, branch=0)
for d in self.datasets
}
@@ -507,7 +613,7 @@ def run(self):
with self.publish_step(f"plotting {self.branch_data.variable} in {category_inst.name}"):
for dataset, inp in self.input().items():
dataset_inst = self.config_inst.get_dataset(dataset)
- h_in = inp[self.branch_data.variable].load(formatter="pickle")
+ h_in = inp["hists"][self.branch_data.variable].load(formatter="pickle")
# sanity checks
n_shifts = len(h_in.axes["shift"])
diff --git a/columnflow/tasks/external.py b/columnflow/tasks/external.py
index a6a875400..bf12dff8d 100644
--- a/columnflow/tasks/external.py
+++ b/columnflow/tasks/external.py
@@ -10,7 +10,7 @@
import time
import shutil
import subprocess
-
+import glob
import luigi
import law
import order as od
@@ -103,7 +103,10 @@ def run(self):
lfns = []
for key in sorted(self.dataset_info_inst.keys):
self.logger.info(f"get lfns for dataset key {key} {msg}")
- lfns.extend(get_dataset_lfns(self.dataset_inst, self.global_shift_inst, key))
+ if msg=='via dasgoclient':
+ lfns.extend(get_dataset_lfns(self.dataset_inst, self.global_shift_inst, key))
+ else:
+ lfns.extend(get_dataset_lfns(self,key))
if self.validate and len(lfns) != self.dataset_info_inst.n_files:
raise ValueError(
@@ -117,6 +120,19 @@ def run(self):
tmp.dump(lfns, indent=4, formatter="json")
self.transfer(tmp)
+ def custom_get_dataset_lfns(
+ self,
+ dataset_key: str,
+ ) -> list[str]:
+ """
+ Function to get the LFN information for custom datasets
+ The path of custom files have to be given in law.cfg file as [custom_pnfs_fs]
+ """
+ base = law.config.get_expanded('custom_pnfs_fs', "base")
+ out = glob.glob(f"{base}{dataset_key}/*/*/*.root")
+ return out
+
+
def get_dataset_lfns_dasgoclient(
self,
dataset_inst: od.Dataset,
diff --git a/columnflow/tasks/framework/mixins.py b/columnflow/tasks/framework/mixins.py
index 0861cc0d3..b3604dee8 100644
--- a/columnflow/tasks/framework/mixins.py
+++ b/columnflow/tasks/framework/mixins.py
@@ -586,10 +586,12 @@ class SelectorStepsMixin(SelectorMixin):
parameter for this task.
"""
+ selector_steps_default = ('_DEFAULT',)
+
selector_steps = law.CSVParameter(
- default=(),
- description="a subset of steps of the selector to apply; uses all steps when empty; "
- "empty default",
+ default=selector_steps_default,
+ description="a subset of steps of the selector to apply; uses all steps when None; "
+ "None default",
brace_expand=True,
parse_empty=True,
)
@@ -628,7 +630,7 @@ def resolve_param_values(cls, params: dict[str, Any]) -> dict[str, Any]:
)
# sort selector steps when the order does not matter
- if not cls.selector_steps_order_sensitive and "selector_steps" in params:
+ if "selector_steps" in params and not cls.selector_steps_order_sensitive:
params["selector_steps"] = tuple(sorted(params["selector_steps"]))
return params
@@ -665,10 +667,10 @@ def store_parts(self) -> law.util.InsertableDict:
parts = super().store_parts()
steps = self.selector_steps
- if not self.selector_steps_order_sensitive:
- steps = sorted(steps)
- if steps:
- parts["selector"] += "__steps_" + "_".join(steps)
+ if steps != self.selector_steps_default:
+ if not self.selector_steps_order_sensitive:
+ steps = sorted(steps)
+ parts["selector"] += ("__steps_" + "_".join(steps) if steps else "__inclusive")
return parts
@@ -2226,3 +2228,81 @@ def iter_chunked_io(self, *args, **kwargs):
# eager, overly cautious gc
del handler
gc.collect()
+
+
+class MergeHistogramMixin(
+ VariablesMixin,
+ law.LocalWorkflow,
+ ):
+ only_missing = luigi.BoolParameter(
+ default=False,
+ description="when True, identify missing variables first and only require histograms of "
+ "missing ones; default: False",
+ )
+ remove_previous = luigi.BoolParameter(
+ default=False,
+ significant=False,
+ description="when True, remove particlar input histograms after merging; default: False",
+ )
+
+ def create_branch_map(self):
+ # create a dummy branch map so that this task could be submitted as a job
+ return {0: None}
+
+ def workflow_requires(self):
+ reqs = super().workflow_requires()
+
+ reqs["hists"] = self.as_branch().requires()
+
+ return reqs
+
+ def requires(self):
+ # optional dynamic behavior: determine not yet created variables and require only those
+ prefer_cli = {"variables"}
+ variables = self.variables
+ if self.only_missing:
+ prefer_cli.clear()
+ missing = self.output().count(existing=False, keys=True)[1]
+ variables = tuple(sorted(missing, key=variables.index))
+
+ if not variables:
+ return []
+
+ return self.reqs.CreateHistograms.req(
+ self,
+ branch=-1,
+ variables=tuple(variables),
+ _exclude={"branches"},
+ _prefer_cli=prefer_cli,
+ )
+
+ def output(self):
+ return {"hists": law.SiblingFileCollection({
+ variable_name: self.target(f"hist__{variable_name}.pickle")
+ for variable_name in self.variables
+ })}
+
+ @law.decorator.log
+ def run(self):
+ # preare inputs and outputs
+ inputs = self.input()["collection"]
+ outputs = self.output()
+
+ # load input histograms
+ hists = [
+ inp["hists"].load(formatter="pickle")
+ for inp in self.iter_progress(inputs.targets.values(), len(inputs), reach=(0, 50))
+ ]
+
+ # create a separate file per output variable
+ variable_names = list(hists[0].keys())
+ for variable_name in self.iter_progress(variable_names, len(variable_names), reach=(50, 100)):
+ self.publish_message(f"merging histograms for '{variable_name}'")
+
+ variable_hists = [h[variable_name] for h in hists]
+ merged = sum(variable_hists[1:], variable_hists[0].copy())
+ outputs["hists"][variable_name].dump(merged, formatter="pickle")
+
+ # optionally remove inputs
+ if self.remove_previous:
+ inputs.remove()
diff --git a/columnflow/tasks/framework/parameters.py b/columnflow/tasks/framework/parameters.py
index 405b786a8..2fedcd21e 100644
--- a/columnflow/tasks/framework/parameters.py
+++ b/columnflow/tasks/framework/parameters.py
@@ -8,7 +8,8 @@
import law
-from columnflow.util import try_float, DotDict
+from columnflow.util import try_float, try_complex, DotDict
+from columnflow.types import Iterable
class SettingsParameter(law.CSVParameter):
@@ -26,21 +27,35 @@ class SettingsParameter(law.CSVParameter):
p.serialize({"param1": 2, "param2": False})
=> "param1=2,param2=False"
"""
+ settings_delimiter = "="
+ tuple_delimiter = ";"
@classmethod
def parse_setting(cls, setting: str) -> tuple[str, float | bool | str]:
- pair = setting.split("=", 1)
+ pair = setting.split(cls.settings_delimiter, 1)
key, value = pair if len(pair) == 2 else (pair[0], "True")
+ if ";" in value:
+ # split by ";" and parse each value
+ value = tuple(cls.parse_value(v) for v in value.split(cls.tuple_delimiter))
+ else:
+ value = cls.parse_value(value)
+ return (key, value)
+
+ @classmethod
+ def parse_value(cls, value):
if try_float(value):
value = float(value)
+ elif try_complex(value):
+ value = complex(value)
elif value.lower() == "true":
value = True
elif value.lower() == "false":
value = False
- return (key, value)
+ return value
@classmethod
- def serialize_setting(cls, name: str, value: str) -> str:
+ def serialize_setting(cls, name: str, value: str | Iterable[str]) -> str:
+ value = ";".join(str(v) for v in law.util.make_tuple(value))
return f"{name}={value}"
def __init__(self, **kwargs):
@@ -103,7 +118,6 @@ def parse(self, inp):
)
# next, merge dicts
outputs = law.util.merge_dicts(*outputs, deep=True)
-
return outputs
def serialize(self, value):
diff --git a/columnflow/tasks/histograms.py b/columnflow/tasks/histograms.py
index 322500249..da93b8db0 100644
--- a/columnflow/tasks/histograms.py
+++ b/columnflow/tasks/histograms.py
@@ -12,7 +12,7 @@
from columnflow.tasks.framework.base import Requirements, AnalysisTask, DatasetTask, wrapper_factory
from columnflow.tasks.framework.mixins import (
CalibratorsMixin, SelectorStepsMixin, ProducersMixin, MLModelsMixin, VariablesMixin,
- ShiftSourcesMixin, WeightProducerMixin, ChunkedIOMixin,
+ ShiftSourcesMixin, WeightProducerMixin, ChunkedIOMixin, MergeHistogramMixin
)
from columnflow.tasks.framework.remote import RemoteWorkflow
from columnflow.tasks.reduction import MergeReducedEventsUser, MergeReducedEvents
@@ -268,27 +268,15 @@ def expr(events, *args, **kwargs):
class MergeHistograms(
- VariablesMixin,
+ MergeHistogramMixin,
WeightProducerMixin,
MLModelsMixin,
ProducersMixin,
SelectorStepsMixin,
CalibratorsMixin,
DatasetTask,
- law.LocalWorkflow,
RemoteWorkflow,
):
- only_missing = luigi.BoolParameter(
- default=False,
- description="when True, identify missing variables first and only require histograms of "
- "missing ones; default: False",
- )
- remove_previous = luigi.BoolParameter(
- default=False,
- significant=False,
- description="when True, remove particlar input histograms after merging; default: False",
- )
-
sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox"))
# upstream requirements
@@ -297,68 +285,6 @@ class MergeHistograms(
CreateHistograms=CreateHistograms,
)
- def create_branch_map(self):
- # create a dummy branch map so that this task could be submitted as a job
- return {0: None}
-
- def workflow_requires(self):
- reqs = super().workflow_requires()
-
- reqs["hists"] = self.as_branch().requires()
-
- return reqs
-
- def requires(self):
- # optional dynamic behavior: determine not yet created variables and require only those
- prefer_cli = {"variables"}
- variables = self.variables
- if self.only_missing:
- prefer_cli.clear()
- missing = self.output().count(existing=False, keys=True)[1]
- variables = tuple(sorted(missing, key=variables.index))
-
- if not variables:
- return []
-
- return self.reqs.CreateHistograms.req(
- self,
- branch=-1,
- variables=tuple(variables),
- _exclude={"branches"},
- _prefer_cli=prefer_cli,
- )
-
- def output(self):
- return {"hists": law.SiblingFileCollection({
- variable_name: self.target(f"hist__{variable_name}.pickle")
- for variable_name in self.variables
- })}
-
- @law.decorator.log
- def run(self):
- # preare inputs and outputs
- inputs = self.input()["collection"]
- outputs = self.output()
-
- # load input histograms
- hists = [
- inp["hists"].load(formatter="pickle")
- for inp in self.iter_progress(inputs.targets.values(), len(inputs), reach=(0, 50))
- ]
-
- # create a separate file per output variable
- variable_names = list(hists[0].keys())
- for variable_name in self.iter_progress(variable_names, len(variable_names), reach=(50, 100)):
- self.publish_message(f"merging histograms for '{variable_name}'")
-
- variable_hists = [h[variable_name] for h in hists]
- merged = sum(variable_hists[1:], variable_hists[0].copy())
- outputs["hists"][variable_name].dump(merged, formatter="pickle")
-
- # optionally remove inputs
- if self.remove_previous:
- inputs.remove()
-
MergeHistogramsWrapper = wrapper_factory(
base_cls=AnalysisTask,
diff --git a/columnflow/tasks/production.py b/columnflow/tasks/production.py
index 1bc430bee..a53e055bb 100644
--- a/columnflow/tasks/production.py
+++ b/columnflow/tasks/production.py
@@ -3,12 +3,13 @@
"""
Tasks related to producing new columns.
"""
+import itertools
import law
from columnflow.tasks.framework.base import Requirements, AnalysisTask, wrapper_factory
from columnflow.tasks.framework.mixins import (
- CalibratorsMixin, SelectorStepsMixin, ProducerMixin, ChunkedIOMixin,
+ CalibratorsMixin, SelectorStepsMixin, ProducerMixin, ChunkedIOMixin, ProducersMixin,
)
from columnflow.tasks.framework.remote import RemoteWorkflow
from columnflow.tasks.reduction import MergeReducedEventsUser, MergeReducedEvents
@@ -165,8 +166,25 @@ def run(self):
)
-ProduceColumnsWrapper = wrapper_factory(
+ProduceColumnsWrapperBase = wrapper_factory(
base_cls=AnalysisTask,
require_cls=ProduceColumns,
enable=["configs", "skip_configs", "datasets", "skip_datasets", "shifts", "skip_shifts"],
)
+ProduceColumnsWrapperBase.exclude_index = True
+
+
+class ProduceColumnsWrapper(
+ ProduceColumnsWrapperBase,
+ ProducersMixin,
+
+):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # add the producers parameter
+ self.wrapper_fields.extend(["producer"])
+
+ combined_parameters = itertools.product(self.wrapper_parameters, self.producers)
+ combined_parameters = [params_tuple + (producer,) for params_tuple, producer in combined_parameters]
+ self.wrapper_parameters = combined_parameters
diff --git a/columnflow/tasks/reduction.py b/columnflow/tasks/reduction.py
index 87cb61559..813f79936 100644
--- a/columnflow/tasks/reduction.py
+++ b/columnflow/tasks/reduction.py
@@ -122,7 +122,7 @@ def run(self):
# define columns to read for the differently structured selection masks
read_sel_columns = set()
# open either selector steps of the full event selection mask
- read_sel_columns.add(Route("steps.*" if self.selector_steps else "event"))
+ read_sel_columns.add(Route("steps.*" if self.selector_steps and self.selector_steps != self.selector_steps_default else "event"))
# add object masks, depending on the columns to write
# (as object masks are dynamic and deeply nested, preload the meta info to access fields)
sel_results = inputs["selection"]["results"].load(formatter="dask_awkward")
@@ -183,7 +183,7 @@ def run(self):
)
# build the event mask
- if self.selector_steps:
+ if self.selector_steps and self.selector_steps != self.selector_steps_default:
# check if all steps are present
missing_steps = set(self.selector_steps) - set(sel.steps.fields)
if missing_steps:
diff --git a/columnflow/tasks/selection.py b/columnflow/tasks/selection.py
index 482043438..433e12efb 100644
--- a/columnflow/tasks/selection.py
+++ b/columnflow/tasks/selection.py
@@ -44,6 +44,14 @@ class SelectEvents(
# strategy for handling missing source columns when adding aliases on event chunks
missing_column_alias_strategy = "original"
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # store the normalization weight producer for MC
+ self.veto_producer: Producer = Producer.get_cls("veto_events")(
+ inst_dict=self.get_producer_kwargs(self),
+ )
+
def workflow_requires(self):
reqs = super().workflow_requires()
@@ -63,6 +71,9 @@ def workflow_requires(self):
# add selector dependent requirements
reqs["selector"] = self.selector_inst.run_requires()
+ # add veto selector dependent requirements
+ reqs["veto"] = self.veto_producer.run_requires()
+
return reqs
def requires(self):
@@ -78,6 +89,9 @@ def requires(self):
# add selector dependent requirements
reqs["selector"] = self.selector_inst.run_requires()
+ # add veto selector dependent requirements
+ reqs["veto"] = self.veto_producer.run_requires()
+
return reqs
def output(self):
@@ -132,14 +146,19 @@ def run(self):
# get shift dependent aliases
aliases = self.local_shift_inst.x("column_aliases", {})
+ # setup the veto producer
+ self.veto_producer.run_setup(self.requires()["veto"], self.input()["veto"])
+
# define columns that need to be read
read_columns = set(map(Route, mandatory_coffea_columns))
read_columns |= self.selector_inst.used_columns
+ read_columns |= self.veto_producer.used_columns
read_columns |= set(map(Route, aliases.values()))
# define columns that will be written
write_columns = set(map(Route, mandatory_coffea_columns))
write_columns |= self.selector_inst.produced_columns
+ write_columns |= self.veto_producer.produced_columns
route_filter = RouteFilter(write_columns)
# let the lfn_task prepare the nano file (basically determine a good pfn)
@@ -172,6 +191,9 @@ def run(self):
# insert additional columns
events = update_ak_array(events, *cols)
+ # add veto
+ events = self.veto_producer(events, file=input_file)
+
# add aliases
events = add_ak_aliases(
events,
@@ -195,7 +217,8 @@ def run(self):
# optional check for finite values
if self.check_finite_output:
- self.raise_if_not_finite(results_array)
+ # ignore vetoed events when checking for finite values
+ self.raise_if_not_finite(results_array[~events.veto])
# save results as parquet via a thread in the same pool
chunk = tmp_dir.child(f"res_{lfn_index}_{pos.index}.parquet", type="f")
@@ -204,11 +227,16 @@ def run(self):
# remove columns
if write_columns:
+
+ # store veto in variable before filtering
+ veto = events.veto
+
events = route_filter(events)
# optional check for finite values
if self.check_finite_output:
- self.raise_if_not_finite(events)
+ # ignore vetoed events when checking for finite values
+ self.raise_if_not_finite(events[~veto])
# save additional columns as parquet via a thread in the same pool
chunk = tmp_dir.child(f"cols_{lfn_index}_{pos.index}.parquet", type="f")
diff --git a/columnflow/util.py b/columnflow/util.py
index 0dec79fea..48b6badec 100644
--- a/columnflow/util.py
+++ b/columnflow/util.py
@@ -10,7 +10,7 @@
"UNSET",
"maybe_import", "import_plt", "import_ROOT", "import_file", "create_random_name", "expand_path",
"real_path", "ensure_dir", "wget", "call_thread", "call_proc", "ensure_proxy", "dev_sandbox",
- "safe_div", "try_float", "try_int", "is_pattern", "is_regex", "pattern_matcher",
+ "safe_div", "try_float", "try_complex", "try_int", "is_pattern", "is_regex", "pattern_matcher",
"dict_add_strict", "get_source_code",
"DotDict", "MockModule", "FunctionArgs", "ClassPropertyDescriptor", "classproperty",
"DerivableMeta", "Derivable",
@@ -30,6 +30,7 @@
import multiprocessing.pool
from functools import wraps
from collections import OrderedDict
+from typing import Hashable, Iterable, Callable
import law
from law.util import InsertableDict # noqa
@@ -412,6 +413,17 @@ def try_float(f: Any) -> bool:
return False
+def try_complex(f: Any) -> bool:
+ """
+ Tests whether a value *f* can be converted to a complex number.
+ """
+ try:
+ complex(f)
+ return True
+ except (ValueError, TypeError):
+ return False
+
+
def try_int(i: Any) -> bool:
"""
Tests whether a value *i* can be converted to an integer.
@@ -521,6 +533,68 @@ def get_source_code(obj: Any, indent: str | int = None) -> str:
return code
+def call_once_on_config(include_hash=False):
+ """
+ Parametrized decorator to ensure that function *func* is only called once for the config *config*
+ """
+ def outer(func):
+ def inner(config, *args, **kwargs):
+ tag = f"{func.__name__}_called"
+ if include_hash:
+ tag += f"_{func.__hash__()}"
+
+ if config.has_tag(tag):
+ return
+
+ config.add_tag(tag)
+ return func(config, *args, **kwargs)
+ return inner
+ return outer
+
+
+def four_vec(
+ collections: str | Iterable[str],
+ columns: str | Iterable[str] | None = None,
+ skip_defaults: bool = False,
+) -> set[str]:
+ """
+ Helper to quickly get a set of 4-vector component string for all collections in *collections*.
+ Additional columns can be added wih the optional *columns* parameter.
+
+ Example:
+
+ .. code-block:: python
+
+ four_vec("Jet", "jetId")
+ # -> {"Jet.pt", "Jet.eta", "Jet.phi", "Jet.mass", "Jet.jetId"}
+
+ four_vec({"Electron", "Muon"})
+ # -> {
+ "Electron.pt", "Electron.eta", "Electron.phi", "Electron.mass",
+ "Muon.pt", "Muon.eta", "Muon.phi", "Muon.mass",
+ }
+ """
+ # make sure *collections* is a set
+ collections = law.util.make_set(collections)
+
+ # transform *columns* to a set and add the default 4-vector components
+ columns = law.util.make_set(columns) if columns else set()
+ default_columns = {"pt", "eta", "phi", "mass"}
+ if not skip_defaults:
+ columns |= default_columns
+
+ outp = set(
+ f"{obj}.{var}"
+ for obj in collections
+ for var in columns
+ )
+
+ # manually remove MET eta and mass
+ outp = outp.difference({"MET.eta", "MET.mass"})
+
+ return outp
+
+
class DotDict(OrderedDict):
"""
Subclass of *OrderedDict* that provides read and write access to items via attributes by
diff --git a/create_analysis.sh b/create_analysis.sh
index 0bfd0b8ec..dd8a8ddd9 100755
--- a/create_analysis.sh
+++ b/create_analysis.sh
@@ -17,7 +17,7 @@ create_analysis() {
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
local exec_dir="$( pwd )"
- local fetch_cf_branch="master"
+ local fetch_cf_branch="main"
local fetch_cmsdb_branch="master"
local debug="${CF_CREATE_ANALYSIS_DEBUG:-false}"
@@ -161,7 +161,7 @@ create_analysis() {
echo
query_input "cf_short_name" "Short name for environment variables, pre- and suffixes" "${cf_module_name}"
echo
- query_input "cf_analysis_flavor" "The flavor of the analysis to setup" "cms_minimal" "cms_minimal"
+ query_input "cf_analysis_flavor" "The flavor of the analysis to setup" "ghent_template" "cms_minimal,ghent_template"
echo
query_input "cf_use_ssh" "Use ssh for git submodules" "True" "True,False"
echo
@@ -203,8 +203,8 @@ create_analysis() {
rm -rf "${exec_dir}/.cf_analysis_setup"
mkdir -p "${exec_dir}/.cf_analysis_setup" || return "$?"
cd "${exec_dir}/.cf_analysis_setup"
- curl -L -s -k "https://github.com/columnflow/columnflow/tarball/${fetch_cf_branch}" | tar -xz || return "$?"
- mv columnflow-columnflow-*/"analysis_templates/${cf_analysis_flavor}" "${cf_analysis_base}" || return "$?"
+ curl -L -s -k "https://github.com/GhentAnalysis/columnflow/tarball/${fetch_cf_branch}" | tar -xz || return "$?"
+ mv GhentAnalysis-columnflow-*/"analysis_templates/${cf_analysis_flavor}" "${cf_analysis_base}" || return "$?"
cd "${cf_analysis_base}" || return "$?"
rm -rf "${exec_dir}/.cf_analysis_setup"
fi
@@ -255,18 +255,26 @@ create_analysis() {
echo_color cyan "setup submodules"
- local gh_prefix="https://github.com/"
+ local gh_prefix_github="https://github.com/"
+ local gh_prefix_gitlab="https://gitlab.cern.ch/"
+
+
+ $( str_lc "${cf_use_ssh}" ) && gh_prefix_github="git@github.com:"
+ $( str_lc "${cf_use_ssh}" ) && gh_prefix_gitlab="ssh://git@gitlab.cern.ch:7999/"
- $( str_lc "${cf_use_ssh}" ) && gh_prefix="git@github.com:"
mkdir -p modules
if ${debug}; then
ln -s "${this_dir}" modules/columnflow
else
- git submodule add -b "${fetch_cf_branch}" "${gh_prefix}columnflow/columnflow.git" modules/columnflow
+ git submodule add -b "${fetch_cf_branch}" "${gh_prefix_github}GhentAnalysis/columnflow.git" modules/columnflow
fi
if [ "${cf_analysis_flavor}" = "cms_minimal" ]; then
- git submodule add -b "${fetch_cmsdb_branch}" "${gh_prefix}uhh-cms/cmsdb.git" modules/cmsdb
+ git submodule add -b "${fetch_cmsdb_branch}" "${gh_prefix_gitlab}ghentanalysis/cmsdb.git" modules/cmsdb
+ fi
+ if [ "${cf_analysis_flavor}" = "ghent_template" ]; then
+ git submodule add -b "${fetch_cmsdb_branch}" "${gh_prefix_github}CMS-LUMI-POG/Normtags.git" modules/Normtags
+ git submodule add -b "${fetch_cmsdb_branch}" "${gh_prefix_gitlab}ghentanalysis/cmsdb.git" modules/cmsdb
fi
git submodule update --init --recursive
diff --git a/law.cfg b/law.cfg
index 617f33e99..0e6b23bd3 100644
--- a/law.cfg
+++ b/law.cfg
@@ -11,7 +11,9 @@ columnflow.tasks.union
columnflow.tasks.histograms
columnflow.tasks.plotting
columnflow.tasks.yields
-columnflow.tasks.cutflow
+columnflow.tasks.cmsGhent.cutflow
+columnflow.tasks.cmsGhent.config
+
[analysis]
@@ -20,7 +22,7 @@ default_analysis: columnflow.example_config.analysis_st.analysis_st
default_config: run2_pp_2018
default_dataset: st_tchannel_t
-production_modules: columnflow.production.{categories,processes,normalization}
+production_modules: columnflow.production.{categories,processes,normalization,veto}
calibration_modules: columnflow.calibration
selection_modules: columnflow.selection.{empty}
categorization_modules: columnflow.categorization
@@ -46,7 +48,7 @@ slurm_partition: $CF_SLURM_PARTITION
# ChunkedIOHandler defaults
chunked_io_chunk_size: 100000
chunked_io_pool_size: 2
-chunked_io_debug: False
+chunked_io_debug: True
# csv list of task families that inherit from ChunkedReaderMixin and whose output arrays should be
# checked (raising an exception) for non-finite values before saving them to disk
diff --git a/sandboxes/venv_lepton_mva.sh b/sandboxes/venv_lepton_mva.sh
new file mode 100644
index 000000000..aa9e74ba9
--- /dev/null
+++ b/sandboxes/venv_lepton_mva.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Script that sets up a virtual env in $CF_VENV_PATH.
+# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.
+
+action() {
+ local shell_is_zsh=$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+
+ # set variables and source the generic venv setup
+ export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
+ export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
+ export CF_VENV_REQUIREMENTS="${this_dir}/venv_lepton_mva.txt"
+
+ source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@"
+}
+action "$@"
diff --git a/sandboxes/venv_lepton_mva.txt b/sandboxes/venv_lepton_mva.txt
new file mode 100644
index 000000000..44f9cfb16
--- /dev/null
+++ b/sandboxes/venv_lepton_mva.txt
@@ -0,0 +1,5 @@
+# version 1
+
+-r columnar.txt
+
+xgboost==2.0.2
diff --git a/sandboxes/venv_lepton_mva_dev.sh b/sandboxes/venv_lepton_mva_dev.sh
new file mode 100644
index 000000000..51f121991
--- /dev/null
+++ b/sandboxes/venv_lepton_mva_dev.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Script that sets up a virtual env in $CF_VENV_PATH.
+# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.
+
+action() {
+ local shell_is_zsh=$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )
+ local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+ local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+
+ # set variables and source the generic venv setup
+ export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
+ export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
+ export CF_VENV_REQUIREMENTS="${this_dir}/venv_lepton_mva.txt,${CF_BASE}/sandboxes/dev.txt"
+
+ source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@"
+}
+action "$@"
diff --git a/setup.sh b/setup.sh
index 41a019b50..6a66936a8 100644
--- a/setup.sh
+++ b/setup.sh
@@ -583,6 +583,7 @@ cf_setup_software_stack() {
2>&1 "${CF_CONDA_BASE}/bin/micromamba" shell hook -y --prefix="$PWD" &> micromamba.sh || return "$?"
# make the setup file relocatable
sed -i -r "s|${CF_CONDA_BASE}|\$\{MAMBA_ROOT_PREFIX\}|" "micromamba.sh" || return "$?"
+ sed -i -r "6 s|/ada_mnt/ada||" "micromamba.sh" || return "$?"
mv "micromamba.sh" "${CF_CONDA_BASE}/etc/profile.d/micromamba.sh"
cat << EOF > "${CF_CONDA_BASE}/.mambarc"
changeps1: false
diff --git a/tests/test_task_parameters.py b/tests/test_task_parameters.py
index b4f444cbf..e06040800 100644
--- a/tests/test_task_parameters.py
+++ b/tests/test_task_parameters.py
@@ -14,6 +14,11 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def test_settings_parameter(self):
+ # check that the default delimiters have not been changed
+ self.assertEqual(SettingsParameter.settings_delimiter, "=")
+ self.assertEqual(SettingsParameter.tuple_delimiter, ";")
+
+ # initialize a SettingParameter
p = SettingsParameter()
# parsing
@@ -23,6 +28,11 @@ def test_settings_parameter(self):
p.parse("param1=10,param2,param3=text,param4=false"),
{"param1": 10.0, "param2": True, "param3": "text", "param4": False},
)
+ self.assertEqual(
+ # parsing of lists of values, separated via ";"
+ p.parse("param1=1;2;3j;4j,param2=a;b;true;false"),
+ {"param1": (1, 2, 3j, 4j), "param2": ("a", "b", True, False)},
+ )
self.assertEqual(
# if a parameter is set multiple times, prioritize last one
p.parse("A=1,B,A=2"),
@@ -34,8 +44,14 @@ def test_settings_parameter(self):
p.serialize({"param1": 2, "param2": False}),
"param1=2,param2=False",
)
+ print(p.serialize({"param1": [1, 2j, "A", True, False]}))
+ self.assertEqual(
+ p.serialize({"param1": [1, 2j, "A", True, False]}),
+ "param1=1;2j;A;True;False",
+ )
def test_multi_settings_parameter(self):
+ # initialize a MultiSettingsParameter
p = MultiSettingsParameter()
# parsing
@@ -46,6 +62,14 @@ def test_multi_settings_parameter(self):
p.parse("obj1,k1=10,k2,k3=text:obj2,k4=false"),
{"obj1": {"k1": 10.0, "k2": True, "k3": "text"}, "obj2": {"k4": False}},
)
+ self.assertEqual(
+ # parsing of lists of values, separated via ";"
+ p.parse("obj1,k1=1;2;3j;4j,k2=a;b;true;false:obj2,k3=5;6;x;y"),
+ {
+ "obj1": {"k1": (1, 2, 3j, 4j), "k2": ("a", "b", True, False)},
+ "obj2": {"k3": (5, 6, "x", "y")},
+ },
+ )
self.assertEqual(
# providing the same key twice results in once combined dict
p.parse("tt,A=2:st,A=2:tt,B=True"),
diff --git a/tests/test_util.py b/tests/test_util.py
index 82373f9e1..30769c453 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -7,7 +7,7 @@
from columnflow.util import (
create_random_name, maybe_import, MockModule, DotDict, Derivable,
- safe_div, try_float, try_int, is_regex, is_pattern, pattern_matcher,
+ safe_div, try_float, try_int, try_complex, is_regex, is_pattern, pattern_matcher,
)
@@ -44,6 +44,13 @@ def test_try_int_try_float(self):
self.assertFalse(try_number(1j))
self.assertFalse(try_number([1, 2]))
+ def test_try_complex(self):
+ self.assertTrue(try_complex("1.2+2.5j"))
+ self.assertFalse(try_complex("some_string"))
+ self.assertFalse(try_complex([1, 2]))
+ # real numbers are also complex number
+ self.assertTrue(try_complex("5.0"))
+
def test_is_regex(self):
self.assertTrue(is_regex(r"^foo\d+.*$"))
self.assertFalse(is_regex(r"^no$atEnd"))