From 7c318434e7edf17fdcb15cbade3a7c92ae8d1905 Mon Sep 17 00:00:00 2001 From: Alexander Froch Date: Thu, 21 May 2026 22:49:54 +0200 Subject: [PATCH 1/2] Sync pre-commit hooks and fix lint --- .pre-commit-config.yaml | 10 +++++----- changelog.md | 1 + tests/integration/test_run_rw.py | 12 +++++------ .../unit/classes/test_preprocessing_config.py | 2 +- tests/unit/stages/test_plotting.py | 2 +- upp/classes/components.py | 6 +++--- upp/grid/download_and_prepare.py | 6 +++--- upp/stages/__init__.py | 14 ++++++------- upp/stages/merging.py | 4 +--- upp/stages/resampling.py | 2 +- upp/stages/reweight.py | 20 +++++++++---------- upp/stages/rw_merge.py | 6 +++--- upp/stages/split_containers.py | 2 +- upp/utils/__init__.py | 2 +- 14 files changed, 44 insertions(+), 45 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1bf81e8..44553364 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,26 +1,26 @@ repos: - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: "v0.6.2" + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.13.0" hooks: - id: ruff args: [--fix, --show-fixes, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.11.2" + rev: "v1.18.1" hooks: - id: mypy args: [--ignore-missing-imports] additional_dependencies: ["types-requests", "types-PyYAML"] - repo: https://github.com/adrienverge/yamllint.git - rev: v1.32.0 + rev: v1.37.1 hooks: - id: yamllint args: ["-d", "{extends: relaxed, rules: {line-length: disable}}"] - repo: https://github.com/jsh9/pydoclint - rev: 0.6.6 + rev: 0.7.3 hooks: - id: pydoclint args: [--style=numpy] diff --git a/changelog.md b/changelog.md index 0a9e22af..d6cf0fe1 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,7 @@ ### [Latest] +- Sync pre-commit hooks and fix Ruff lint failures [#TBD](https://github.com/umami-hep/umami-preprocessing/pull/TBD) - Update documentation [#124](https://github.com/umami-hep/umami-preprocessing/pull/124) - Vectorize `_assign_weights` in rw_merge for ~14x speedup and fix hardcoded "train" in output filenames [#123](https://github.com/umami-hep/umami-preprocessing/pull/123) - Allow reweighting number of jets estimate to be larger than the minority class count [#122](https://github.com/umami-hep/umami-preprocessing/pull/122) diff --git a/tests/integration/test_run_rw.py b/tests/integration/test_run_rw.py index b0ea8b8e..1c07c8c7 100644 --- a/tests/integration/test_run_rw.py +++ b/tests/integration/test_run_rw.py @@ -54,9 +54,9 @@ def _run_split(self): main(args) outpath = Path("tmp/upp-tests/integration/temp_workspace/split-components") - assert ( - outpath / "organised-components.yaml" - ).exists(), "Organised components file not found" + assert (outpath / "organised-components.yaml").exists(), ( + "Organised components file not found" + ) for container in ["data1.h5", "data2.h5", "data3.h5"]: assert (outpath / container).exists() @@ -117,9 +117,9 @@ def _rw_merge(self): assert "jets" in f, "Expected 'jets' group in output file" print("LOL", f.attrs, f["jets"].attrs, f["jets"].attrs.keys()) - assert ( - "flavour_label" in f["jets"].attrs - ), "Expected 'flavour_label' attribute in 'jets' group of output file" + assert "flavour_label" in f["jets"].attrs, ( + "Expected 'flavour_label' attribute in 'jets' group of output file" + ) assert "flavour_label" in f["jets"].dtype.names def test_rw(self): diff --git a/tests/unit/classes/test_preprocessing_config.py b/tests/unit/classes/test_preprocessing_config.py index 8ec2cdd3..0445f650 100644 --- a/tests/unit/classes/test_preprocessing_config.py +++ b/tests/unit/classes/test_preprocessing_config.py @@ -73,7 +73,7 @@ def test_get_file_name(self) -> None: # Valid cases self.assertEqual( str(config.get_file_name("resampled")), - "/tmp/upp-tests/integration/temp_workspace/" "test_out/pp_output_train.h5", + "/tmp/upp-tests/integration/temp_workspace/test_out/pp_output_train.h5", ) self.assertEqual( str(config.get_file_name("resampled_scaled_shuffled")), diff --git a/tests/unit/stages/test_plotting.py b/tests/unit/stages/test_plotting.py index 338aa6d9..71bdd8c1 100644 --- a/tests/unit/stages/test_plotting.py +++ b/tests/unit/stages/test_plotting.py @@ -13,7 +13,7 @@ class TestClass: def generate_mock(self, out_file, N=100): - fname, f = get_mock_file(num_jets=N, fname=out_file) + _fname, f = get_mock_file(num_jets=N, fname=out_file) f.close() def setup_method(self, method): diff --git a/upp/classes/components.py b/upp/classes/components.py index babd90a4..6ab1bbd2 100644 --- a/upp/classes/components.py +++ b/upp/classes/components.py @@ -313,9 +313,9 @@ def from_config(cls, config: PreprocessingConfig) -> Components: component_list = [] for component in config.config["components"]: # Ensure equal_jets flag is correctly set - assert ( - "equal_jets" not in component - ), "equal_jets flag should be set in the sample config" + assert "equal_jets" not in component, ( + "equal_jets flag should be set in the sample config" + ) # Get the region cuts region_cuts = ( diff --git a/upp/grid/download_and_prepare.py b/upp/grid/download_and_prepare.py index 10fa26b4..9abe1b33 100644 --- a/upp/grid/download_and_prepare.py +++ b/upp/grid/download_and_prepare.py @@ -121,9 +121,9 @@ def create_meta_data( ] vds = [f / "vds/vds.h5" for f in sel_containers] - assert all( - f.exists() for f in vds - ), f"Not all VDS files exist for {split} {flavour}. Found: {vds}" + assert all(f.exists() for f in vds), ( + f"Not all VDS files exist for {split} {flavour}. Found: {vds}" + ) files_by_component[split][flavour.name] = [str(f) for f in vds] diff --git a/upp/stages/__init__.py b/upp/stages/__init__.py index 427c52b0..84a76196 100644 --- a/upp/stages/__init__.py +++ b/upp/stages/__init__.py @@ -10,17 +10,17 @@ from upp.stages.resampling import Resampling, safe_divide, select_batch __all__ = [ - "bin_jets", "Hist", - "create_histograms", - "subdivide_bins", - "upscale_array", - "upscale_array_regionally", "Merging", "Normalisation", + "Resampling", + "bin_jets", + "create_histograms", "make_hist", "plot_resampling_dists", - "select_batch", "safe_divide", - "Resampling", + "select_batch", + "subdivide_bins", + "upscale_array", + "upscale_array_regionally", ] diff --git a/upp/stages/merging.py b/upp/stages/merging.py index fc4dfdb3..f82d7e4a 100644 --- a/upp/stages/merging.py +++ b/upp/stages/merging.py @@ -188,9 +188,7 @@ def _is_part_valid(self, sample: str | None, part_idx: int) -> bool: # All expected datasets that are present should match obs_len for nm in expected_names: if nm in f and f[nm].shape[0] != obs_len: - log.warning( - f"Dataset '{nm}' len={f[nm].shape[0]} " f"!= {obs_len} in {fname}" - ) + log.warning(f"Dataset '{nm}' len={f[nm].shape[0]} != {obs_len} in {fname}") return False # Compare with expected rows for this part (if split mode) diff --git a/upp/stages/resampling.py b/upp/stages/resampling.py index ff0f243c..ab7c0a9b 100644 --- a/upp/stages/resampling.py +++ b/upp/stages/resampling.py @@ -380,7 +380,7 @@ def run_on_region( ): log.info( f"{component} usampling ratio is {np.mean(component._ups_ratio):.3f}, with" - f" {component.num_jets/np.mean(component._ups_ratio):,.0f}/" + f" {component.num_jets / np.mean(component._ups_ratio):,.0f}/" f"{component.num_jets:,} unique jets." f" Jets are upsampled at most {np.max(component._ups_max):.0f} times" ) diff --git a/upp/stages/reweight.py b/upp/stages/reweight.py index 0be0e72d..22bcd749 100644 --- a/upp/stages/reweight.py +++ b/upp/stages/reweight.py @@ -20,15 +20,15 @@ def __init__(self, config: PreprocessingConfig): self.config = config self.rw_config = config.rw_config self.flavours = [f.name for f in config.components.flavours] - assert ( - self.rw_config is not None - ), "Reweighting configuration is not set in the preprocessing config" + assert self.rw_config is not None, ( + "Reweighting configuration is not set in the preprocessing config" + ) self.organised_components_config = ( Path(config.base_dir) / "split-components/organised-components.yaml" ) - assert ( - self.organised_components_config.exists() - ), f"Organised components config file not found: {self.organised_components_config}" + assert self.organised_components_config.exists(), ( + f"Organised components config file not found: {self.organised_components_config}" + ) @property def hists_path(self): @@ -90,9 +90,9 @@ def calculate_weights( print(f"Calculating weights for {len(reweights)} reweights") readers, per_reader_num_jets = self.get_input_readers() for reader in readers: - assert ( - reader.batch_size == readers[0].batch_size - ), "All readers must have the same batch size" + assert reader.batch_size == readers[0].batch_size, ( + "All readers must have the same batch size" + ) batch_size_per_file = readers[0].batch_size all_vars = {} existing_vars = {} @@ -174,7 +174,7 @@ def calculate_weights( for cls in classes: mask = data[rw.class_var] == cls - hist, outbins = bin_jets(data[mask][rw.reweight_vars], rw.flat_bins) + hist, _outbins = bin_jets(data[mask][rw.reweight_vars], rw.flat_bins) if rw.class_var is not None: cls = str(cls) if rw_group not in all_histograms: diff --git a/upp/stages/rw_merge.py b/upp/stages/rw_merge.py index 31a7efbe..f2e3a9ed 100644 --- a/upp/stages/rw_merge.py +++ b/upp/stages/rw_merge.py @@ -30,9 +30,9 @@ def __init__(self, config, outfile_idx_range=None): self.organised_components_config = ( Path(config.base_dir) / "split-components/organised-components.yaml" ) - assert ( - self.organised_components_config.exists() - ), f"Organised components config file not found: {self.organised_components_config}" + assert self.organised_components_config.exists(), ( + f"Organised components config file not found: {self.organised_components_config}" + ) with open(self.organised_components_config) as f: organised_components = yaml.safe_load(f) diff --git a/upp/stages/split_containers.py b/upp/stages/split_containers.py index c9b08ab4..efff1938 100644 --- a/upp/stages/split_containers.py +++ b/upp/stages/split_containers.py @@ -189,7 +189,7 @@ def split_file( assert all( len(_flavour_label_by_component[component]) == 1 for component in sample_components ), f"Each component must have exactly 1 flavour label not {_flavour_label_by_component}" - flavour_label_by_component: dict[str, int] = { # noqa: no-redef + flavour_label_by_component: dict[str, int] = { component: _flavour_label_by_component[component][0] for component in _flavour_label_by_component } diff --git a/upp/utils/__init__.py b/upp/utils/__init__.py index e6147a66..cd39bd41 100644 --- a/upp/utils/__init__.py +++ b/upp/utils/__init__.py @@ -6,7 +6,7 @@ from upp.utils.tools import path_append __all__ = [ - "path_append", "ProgressBar", + "path_append", "setup_logger", ] From ff2659e7bc39918fe6a090cec57d2793ce4d800c Mon Sep 17 00:00:00 2001 From: Alexander Froch Date: Thu, 21 May 2026 22:51:15 +0200 Subject: [PATCH 2/2] Update changelog PR reference --- changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index d6cf0fe1..75adbdba 100644 --- a/changelog.md +++ b/changelog.md @@ -2,7 +2,7 @@ ### [Latest] -- Sync pre-commit hooks and fix Ruff lint failures [#TBD](https://github.com/umami-hep/umami-preprocessing/pull/TBD) +- Sync pre-commit hooks and fix Ruff lint failures [#138](https://github.com/umami-hep/umami-preprocessing/pull/138) - Update documentation [#124](https://github.com/umami-hep/umami-preprocessing/pull/124) - Vectorize `_assign_weights` in rw_merge for ~14x speedup and fix hardcoded "train" in output filenames [#123](https://github.com/umami-hep/umami-preprocessing/pull/123) - Allow reweighting number of jets estimate to be larger than the minority class count [#122](https://github.com/umami-hep/umami-preprocessing/pull/122)