Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
repos:
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.6.2"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.13.0"
hooks:
- id: ruff
args: [--fix, --show-fixes, --exit-non-zero-on-fix]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.11.2"
rev: "v1.18.1"
hooks:
- id: mypy
args: [--ignore-missing-imports]
additional_dependencies: ["types-requests", "types-PyYAML"]

- repo: https://github.com/adrienverge/yamllint.git
rev: v1.32.0
rev: v1.37.1
hooks:
- id: yamllint
args: ["-d", "{extends: relaxed, rules: {line-length: disable}}"]

- repo: https://github.com/jsh9/pydoclint
rev: 0.6.6
rev: 0.7.3
hooks:
- id: pydoclint
args: [--style=numpy]
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### [Latest]

- Sync pre-commit hooks and fix Ruff lint failures [#138](https://github.com/umami-hep/umami-preprocessing/pull/138)
- Update docs and CI validation for supported Python versions [#137](https://github.com/umami-hep/umami-preprocessing/pull/137)
- Refresh Umami integration docs and CLI help text [#139](https://github.com/umami-hep/umami-preprocessing/pull/139)
- Avoid expected divide warnings in reweighting weight calculation [#140](https://github.com/umami-hep/umami-preprocessing/pull/140)
Expand Down
12 changes: 6 additions & 6 deletions tests/integration/test_run_rw.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def _run_split(self):
main(args)
outpath = Path("tmp/upp-tests/integration/temp_workspace/split-components")

assert (
outpath / "organised-components.yaml"
).exists(), "Organised components file not found"
assert (outpath / "organised-components.yaml").exists(), (
"Organised components file not found"
)

for container in ["data1.h5", "data2.h5", "data3.h5"]:
assert (outpath / container).exists()
Expand Down Expand Up @@ -117,9 +117,9 @@ def _rw_merge(self):
assert "jets" in f, "Expected 'jets' group in output file"
print("LOL", f.attrs, f["jets"].attrs, f["jets"].attrs.keys())

assert (
"flavour_label" in f["jets"].attrs
), "Expected 'flavour_label' attribute in 'jets' group of output file"
assert "flavour_label" in f["jets"].attrs, (
"Expected 'flavour_label' attribute in 'jets' group of output file"
)
assert "flavour_label" in f["jets"].dtype.names

def test_rw(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/classes/test_preprocessing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_get_file_name(self) -> None:
# Valid cases
self.assertEqual(
str(config.get_file_name("resampled")),
"/tmp/upp-tests/integration/temp_workspace/" "test_out/pp_output_train.h5",
"/tmp/upp-tests/integration/temp_workspace/test_out/pp_output_train.h5",
)
self.assertEqual(
str(config.get_file_name("resampled_scaled_shuffled")),
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/stages/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

class TestClass:
def generate_mock(self, out_file, N=100):
fname, f = get_mock_file(num_jets=N, fname=out_file)
_fname, f = get_mock_file(num_jets=N, fname=out_file)
f.close()

def setup_method(self, method):
Expand Down
6 changes: 3 additions & 3 deletions upp/classes/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,9 @@ def from_config(cls, config: PreprocessingConfig) -> Components:
component_list = []
for component in config.config["components"]:
# Ensure equal_jets flag is correctly set
assert (
"equal_jets" not in component
), "equal_jets flag should be set in the sample config"
assert "equal_jets" not in component, (
"equal_jets flag should be set in the sample config"
)

# Get the region cuts
region_cuts = (
Expand Down
6 changes: 3 additions & 3 deletions upp/grid/download_and_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def create_meta_data(
]

vds = [f / "vds/vds.h5" for f in sel_containers]
assert all(
f.exists() for f in vds
), f"Not all VDS files exist for {split} {flavour}. Found: {vds}"
assert all(f.exists() for f in vds), (
f"Not all VDS files exist for {split} {flavour}. Found: {vds}"
)

files_by_component[split][flavour.name] = [str(f) for f in vds]

Expand Down
14 changes: 7 additions & 7 deletions upp/stages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
from upp.stages.resampling import Resampling, safe_divide, select_batch

__all__ = [
"bin_jets",
"Hist",
"create_histograms",
"subdivide_bins",
"upscale_array",
"upscale_array_regionally",
"Merging",
"Normalisation",
"Resampling",
"bin_jets",
"create_histograms",
"make_hist",
"plot_resampling_dists",
"select_batch",
"safe_divide",
"Resampling",
"select_batch",
"subdivide_bins",
"upscale_array",
"upscale_array_regionally",
]
4 changes: 1 addition & 3 deletions upp/stages/merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,7 @@ def _is_part_valid(self, sample: str | None, part_idx: int) -> bool:
# All expected datasets that are present should match obs_len
for nm in expected_names:
if nm in f and f[nm].shape[0] != obs_len:
log.warning(
f"Dataset '{nm}' len={f[nm].shape[0]} " f"!= {obs_len} in {fname}"
)
log.warning(f"Dataset '{nm}' len={f[nm].shape[0]} != {obs_len} in {fname}")
return False

# Compare with expected rows for this part (if split mode)
Expand Down
2 changes: 1 addition & 1 deletion upp/stages/resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def run_on_region(
):
log.info(
f"{component} usampling ratio is {np.mean(component._ups_ratio):.3f}, with"
f" {component.num_jets/np.mean(component._ups_ratio):,.0f}/"
f" {component.num_jets / np.mean(component._ups_ratio):,.0f}/"
f"{component.num_jets:,} unique jets."
f" Jets are upsampled at most {np.max(component._ups_max):.0f} times"
)
Expand Down
20 changes: 10 additions & 10 deletions upp/stages/reweight.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def __init__(self, config: PreprocessingConfig):
self.config = config
self.rw_config = config.rw_config
self.flavours = [f.name for f in config.components.flavours]
assert (
self.rw_config is not None
), "Reweighting configuration is not set in the preprocessing config"
assert self.rw_config is not None, (
"Reweighting configuration is not set in the preprocessing config"
)
self.organised_components_config = (
Path(config.base_dir) / "split-components/organised-components.yaml"
)
assert (
self.organised_components_config.exists()
), f"Organised components config file not found: {self.organised_components_config}"
assert self.organised_components_config.exists(), (
f"Organised components config file not found: {self.organised_components_config}"
)

@property
def hists_path(self):
Expand Down Expand Up @@ -90,9 +90,9 @@ def calculate_weights(
print(f"Calculating weights for {len(reweights)} reweights")
readers, per_reader_num_jets = self.get_input_readers()
for reader in readers:
assert (
reader.batch_size == readers[0].batch_size
), "All readers must have the same batch size"
assert reader.batch_size == readers[0].batch_size, (
"All readers must have the same batch size"
)
batch_size_per_file = readers[0].batch_size
all_vars = {}
existing_vars = {}
Expand Down Expand Up @@ -174,7 +174,7 @@ def calculate_weights(

for cls in classes:
mask = data[rw.class_var] == cls
hist, outbins = bin_jets(data[mask][rw.reweight_vars], rw.flat_bins)
hist, _outbins = bin_jets(data[mask][rw.reweight_vars], rw.flat_bins)
if rw.class_var is not None:
cls = str(cls)
if rw_group not in all_histograms:
Expand Down
6 changes: 3 additions & 3 deletions upp/stages/rw_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def __init__(self, config, outfile_idx_range=None):
self.organised_components_config = (
Path(config.base_dir) / "split-components/organised-components.yaml"
)
assert (
self.organised_components_config.exists()
), f"Organised components config file not found: {self.organised_components_config}"
assert self.organised_components_config.exists(), (
f"Organised components config file not found: {self.organised_components_config}"
)

with open(self.organised_components_config) as f:
organised_components = yaml.safe_load(f)
Expand Down
2 changes: 1 addition & 1 deletion upp/stages/split_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def split_file(
assert all(
len(_flavour_label_by_component[component]) == 1 for component in sample_components
), f"Each component must have exactly 1 flavour label not {_flavour_label_by_component}"
flavour_label_by_component: dict[str, int] = { # noqa: no-redef
flavour_label_by_component: dict[str, int] = {
component: _flavour_label_by_component[component][0]
for component in _flavour_label_by_component
}
Expand Down
2 changes: 1 addition & 1 deletion upp/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from upp.utils.tools import path_append

__all__ = [
"path_append",
"ProgressBar",
"path_append",
"setup_logger",
]
Loading