Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ $ esgqa [-h] [-P <parallel_processes>] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION]
- `-h, --help`: show this help message and exit
- `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores).
- `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_<hash>`.
- `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used.
- `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks. If the version is omitted, `latest` will be used (`'cf'` and `'cf:latest'` are equivalent).
- `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `'<checker>:<option_name>[:<option_value>]'`. Multiple invocations possible.
- `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run.
- `-r, --resume`: Specify to continue a previous QC run. Requires the `<output_dir>` argument to be set.
Expand Down
89 changes: 66 additions & 23 deletions esgf_qa/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,57 @@ def get_checker_release_versions(checkers, checker_options={}):
check_suite = CheckSuite(options=checker_options)
check_suite.load_all_available_checkers()
for checker in checkers:
if checker.split(":")[0] not in checker_release_versions:
if checker.split(":")[0] in checker_dict:
checker_release_versions[checker.split(":")[0]] = (
check_suite.checkers.get(
checker, "unknown version"
)._cc_spec_version
)
elif checker.split(":")[0] in checker_dict_ext:
checker_release_versions[checker.split(":")[0]] = version
checker_name = checker.split(":")[0]
if checker_name not in checker_release_versions:
if checker_name in checker_dict_ext and checker_name not in checker_dict:
# Internal esgf-qa checker (cons, cont, comp) - use esgf-qa version
checker_release_versions[checker_name] = version
else:
checker_release_versions[checker.split(":")[0]] = (
check_suite.checkers.get(
checker, "unknown version"
)._cc_spec_version
# compliance-checker plugin: look up _cc_spec_version.
# CC >= 6.0.0 removed :latest, so fall back to the highest
# explicitly versioned key when the requested key is missing.
checker_obj = check_suite.checkers.get(checker)
if checker_obj is None:
prefix = checker_name + ":"
candidates = [
k for k in check_suite.checkers if k.startswith(prefix)
]
if candidates:
resolved_key = max(
candidates,
key=lambda k: pversion.parse(k.split(":")[1]),
)
checker_obj = check_suite.checkers.get(resolved_key)
checker_release_versions[checker_name] = (
checker_obj._cc_spec_version
if checker_obj is not None
else "unknown"
)


def normalize_checker_specs(checkers_versions):
"""
Normalize checker specifications for compliance-checker.

Parameters
----------
checkers_versions : dict
Mapping of checker name to requested version string.

Returns
-------
list
Sorted checker specs where explicit versions are kept as
'<checker>:<version>' and 'latest' maps to unversioned '<checker>'.
"""
return sorted(
[
checker if checker_version == "latest" else f"{checker}:{checker_version}"
for checker, checker_version in checkers_versions.items()
]
)


def run_compliance_checker(file_path, checkers, checker_options={}):
"""
Run the compliance checker on a file with the specified checkers and options.
Expand Down Expand Up @@ -194,7 +228,7 @@ def run_compliance_checker(file_path, checkers, checker_options={}):
if include_checks:
results = {}
for checker in checkers:
if include_checks and "cc6:latest" in checker or "mip:latest" in checker:
if include_checks and checker.split(":")[0] in ("cc6", "mip"):
results.update(
check_suite.run_all(ds, [checker], include_checks, skip_checks=[])
)
Expand Down Expand Up @@ -274,7 +308,10 @@ def process_file(
and os.path.isfile(result_file)
and (
os.path.isfile(consistency_file)
or not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers)
or not any(
cn.split(":", 1)[0] in checker_supporting_consistency_checks
for cn in checkers
)
)
):
with open(result_file) as file:
Expand Down Expand Up @@ -737,9 +774,11 @@ def main():
"ERROR: Cannot run both 'cc6' and 'mip' checkers at the same time."
)

# Combine checkers and versions
# (checker_options are hardcoded)
checkers = sorted([f"{c}:{v}" for c, v in checkers_versions.items()])
# Normalize checker specifications for compliance-checker:
# - explicit versions are forwarded as '<checker>:<version>'
# - omitted versions and ':latest' are both forwarded as '<checker>'
# so compliance-checker selects the highest installed version.
checkers = normalize_checker_specs(checkers_versions)

# Does parent_dir exist?
if parent_dir is None:
Expand All @@ -760,13 +799,17 @@ def main():
with open(os.path.join(result_dir, ".resume_info"), "w") as f:
json.dump(resume_info, f, sort_keys=True, indent=4)

# If only cf checker is selected, run cc6 time checks only
# If none of the selected checkers support consistency checks,
# add mip time checks so consistency output can be generated.
if (
not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers)
not any(
cn.split(":", 1)[0] in checker_supporting_consistency_checks
for cn in checkers
)
and include_consistency_checks
):
time_checks_only = True
checkers.append("mip:latest")
checkers.append("mip")
checkers.sort()
else:
time_checks_only = False
Expand Down Expand Up @@ -1026,7 +1069,7 @@ def main():
)
del result

# Skip continuity and consistency checks if no cc6/mip checks were run
# Skip continuity and consistency checks if no appropriate checkers were run
# (and thus no consistency output file was created)
if any(
ch.split(":", 1)[0] in checker_supporting_consistency_checks for ch in checkers
Expand Down Expand Up @@ -1120,7 +1163,7 @@ def main():
print()
print("#" * 50)
print(
f"# QA Part {'3' if 'cc6:latest' in checkers or 'mip:latest' in checkers else '2'} - Summarizing and clustering the results"
f"# QA Part {'3' if any(cn.split(':')[0] in checker_supporting_consistency_checks for cn in checkers) else '2'} - Summarizing and clustering the results"
)
print("#" * 50)
print()
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ classifiers = [
dependencies = [
"cftime",
"cf_xarray",
"compliance-checker>=5.3.0",
"compliance-checker>=6.0.0",
"dask",
"netCDF4",
"packaging",
"pandas",
"textual",
"xarray",
"cc-plugin-cc6>=0.4.0",
"cc-plugin-wcrp"
"cc-plugin-cc6>=0.4.3",
"cc-plugin-wcrp>=2.1.0"
]
dynamic = [
"version"
Expand Down
12 changes: 12 additions & 0 deletions tests/test_run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
get_checker_release_versions,
get_default_result_dir,
get_dsid,
normalize_checker_specs,
parse_options,
track_checked_datasets,
)
Expand Down Expand Up @@ -179,3 +180,14 @@ def test_parse_options():
},
)
assert _verify_options_dict(opt_dict) is True


def test_latest_and_omitted_versions_are_equivalent_in_internal_specs():
checkers_versions = {"cf": "latest", "cc6": "latest", "wcrp_cmip6": "1.7"}
checkers = normalize_checker_specs(checkers_versions)

assert "cf" in checkers
assert "cc6" in checkers
assert "wcrp_cmip6:1.7" in checkers
assert "cf:latest" not in checkers
assert "cc6:latest" not in checkers
Loading