From cc6070b03dfafeafe0128d0d5580e9dab6a06373 Mon Sep 17 00:00:00 2001 From: swarnaleem Date: Mon, 27 Apr 2026 16:19:59 +0200 Subject: [PATCH 1/4] latest version resolution for compliance-checker --- esgf_qa/run_qa.py | 78 +++++++++++++++++++++++++++++++------- tests/test_run_dummy_qa.py | 2 +- tests/test_run_qa.py | 10 +++++ 3 files changed, 75 insertions(+), 15 deletions(-) diff --git a/esgf_qa/run_qa.py b/esgf_qa/run_qa.py index 6482064..f1856ce 100644 --- a/esgf_qa/run_qa.py +++ b/esgf_qa/run_qa.py @@ -117,6 +117,33 @@ def get_installed_checker_versions(): return installed_versions +def resolve_latest_version(checker_name, installed_versions): + """ + Resolve 'latest' to the actual latest installed version number. + + Needed because compliance-checker >= 6.0.0 removed the ':latest' alias + from all checkers. esgf-qa still accepts 'latest' as a convenience alias + and maps it to the highest installed version here. + + Parameters + ---------- + checker_name : str + The checker name (e.g. 'cf'). + installed_versions : dict + Mapping returned by ``get_installed_checker_versions()``. + + Returns + ------- + str + The actual latest version string (e.g. '1.11'), or 'latest' if the + version cannot be resolved (e.g. checker not installed). + """ + versions = [v for v in installed_versions.get(checker_name, []) if v != "latest"] + if versions: + return versions[-1] # list is already sorted ascending; last = highest + return "latest" + + def get_checker_release_versions(checkers, checker_options={}): """ Get the release versions of the checkers. @@ -140,20 +167,27 @@ def get_checker_release_versions(checkers, checker_options={}): check_suite = CheckSuite(options=checker_options) check_suite.load_all_available_checkers() for checker in checkers: - if checker.split(":")[0] not in checker_release_versions: - if checker.split(":")[0] in checker_dict: - checker_release_versions[checker.split(":")[0]] = ( - check_suite.checkers.get( - checker, "unknown version" - )._cc_spec_version - ) - elif checker.split(":")[0] in checker_dict_ext: - checker_release_versions[checker.split(":")[0]] = version + checker_name = checker.split(":")[0] + if checker_name not in checker_release_versions: + if checker_name in checker_dict_ext and checker_name not in checker_dict: + # Internal esgf-qa checker (cons, cont, comp) - use esgf-qa version + checker_release_versions[checker_name] = version else: - checker_release_versions[checker.split(":")[0]] = ( - check_suite.checkers.get( - checker, "unknown version" - )._cc_spec_version + # compliance-checker plugin: look up _cc_spec_version. + # CC >= 6.0.0 removed :latest, so fall back to the highest + # explicitly versioned key when the requested key is missing. + checker_obj = check_suite.checkers.get(checker) + if checker_obj is None: + prefix = checker_name + ":" + candidates = [k for k in check_suite.checkers if k.startswith(prefix)] + if candidates: + resolved_key = max( + candidates, + key=lambda k: pversion.parse(k.split(":")[1]), + ) + checker_obj = check_suite.checkers.get(resolved_key) + checker_release_versions[checker_name] = ( + checker_obj._cc_spec_version if checker_obj is not None else "unknown" ) @@ -194,7 +228,7 @@ def run_compliance_checker(file_path, checkers, checker_options={}): if include_checks: results = {} for checker in checkers: - if include_checks and "cc6:latest" in checker or "mip:latest" in checker: + if include_checks and checker.split(":")[0] in ("cc6", "mip"): results.update( check_suite.run_all(ds, [checker], include_checks, skip_checks=[]) ) @@ -737,6 +771,22 @@ def main(): "ERROR: Cannot run both 'cc6' and 'mip' checkers at the same time." ) + # Resolve :latest to the actual highest installed version number. + # compliance-checker >= 6.0.0 removed the :latest alias, so passing + # e.g. 'cf:latest' to CheckSuite.run_all() raises a KeyError there. + # esgf-qa keeps accepting 'latest' from the user and maps it here. + if any(v == "latest" for v in checkers_versions.values()): + _installed = ( + cc_checker_versions + if "cc_checker_versions" in locals() + else get_installed_checker_versions() + ) + for _checker_i in list(checkers_versions.keys()): + if checkers_versions[_checker_i] == "latest": + checkers_versions[_checker_i] = resolve_latest_version( + _checker_i, _installed + ) + # Combine checkers and versions # (checker_options are hardcoded) checkers = sorted([f"{c}:{v}" for c, v in checkers_versions.items()]) diff --git a/tests/test_run_dummy_qa.py b/tests/test_run_dummy_qa.py index 11ed49c..1a954f7 100644 --- a/tests/test_run_dummy_qa.py +++ b/tests/test_run_dummy_qa.py @@ -188,4 +188,4 @@ def test_process_dataset_cached(self, fake_check_suite, tmp_env, dummy_nc_file): ) assert ds_id == ds - assert result == {"cf": {"errors": {}}} + assert result == {"cf": {"errors": {}}} \ No newline at end of file diff --git a/tests/test_run_qa.py b/tests/test_run_qa.py index a7eb43c..aaa6811 100644 --- a/tests/test_run_qa.py +++ b/tests/test_run_qa.py @@ -179,3 +179,13 @@ def test_parse_options(): }, ) assert _verify_options_dict(opt_dict) is True + +from esgf_qa.run_qa import resolve_latest_version + +def test_resolve_latest_version(): + installed = {"cf": ["1.10", "1.11", "latest"]} + assert resolve_latest_version("cf", installed) == "1.11" + # checker not installed -> stays as "latest" + assert resolve_latest_version("unknown", installed) == "latest" + # only one version + assert resolve_latest_version("cf", {"cf": ["1.6", "latest"]}) == "1.6" \ No newline at end of file From fb57ae855eaad501ff13b6c61c421d16bf8258d0 Mon Sep 17 00:00:00 2001 From: sol1105 Date: Thu, 30 Apr 2026 16:55:51 +0200 Subject: [PATCH 2/4] Normalize checker:latest to unversioned checker and rely on compliance-checker to select latest checker version. --- README.md | 2 +- esgf_qa/run_qa.py | 101 ++++++++++++++++++++----------------------- tests/test_run_qa.py | 18 ++++---- 3 files changed, 58 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index de48da0..ef77633 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ $ esgqa [-h] [-P ] [-o ] [-t ] [-O OPTION] - `-h, --help`: show this help message and exit - `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores). - `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_`. - - `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used. + - `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks. If the version is omitted, `latest` will be used (`'cf'` and `'cf:latest'` are equivalent). - `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `':[:]'`. Multiple invocations possible. - `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run. - `-r, --resume`: Specify to continue a previous QC run. Requires the `` argument to be set. diff --git a/esgf_qa/run_qa.py b/esgf_qa/run_qa.py index f1856ce..e0c3e04 100644 --- a/esgf_qa/run_qa.py +++ b/esgf_qa/run_qa.py @@ -117,33 +117,6 @@ def get_installed_checker_versions(): return installed_versions -def resolve_latest_version(checker_name, installed_versions): - """ - Resolve 'latest' to the actual latest installed version number. - - Needed because compliance-checker >= 6.0.0 removed the ':latest' alias - from all checkers. esgf-qa still accepts 'latest' as a convenience alias - and maps it to the highest installed version here. - - Parameters - ---------- - checker_name : str - The checker name (e.g. 'cf'). - installed_versions : dict - Mapping returned by ``get_installed_checker_versions()``. - - Returns - ------- - str - The actual latest version string (e.g. '1.11'), or 'latest' if the - version cannot be resolved (e.g. checker not installed). - """ - versions = [v for v in installed_versions.get(checker_name, []) if v != "latest"] - if versions: - return versions[-1] # list is already sorted ascending; last = highest - return "latest" - - def get_checker_release_versions(checkers, checker_options={}): """ Get the release versions of the checkers. @@ -179,7 +152,9 @@ def get_checker_release_versions(checkers, checker_options={}): checker_obj = check_suite.checkers.get(checker) if checker_obj is None: prefix = checker_name + ":" - candidates = [k for k in check_suite.checkers if k.startswith(prefix)] + candidates = [ + k for k in check_suite.checkers if k.startswith(prefix) + ] if candidates: resolved_key = max( candidates, @@ -187,10 +162,35 @@ def get_checker_release_versions(checkers, checker_options={}): ) checker_obj = check_suite.checkers.get(resolved_key) checker_release_versions[checker_name] = ( - checker_obj._cc_spec_version if checker_obj is not None else "unknown" + checker_obj._cc_spec_version + if checker_obj is not None + else "unknown" ) +def normalize_checker_specs(checkers_versions): + """ + Normalize checker specifications for compliance-checker. + + Parameters + ---------- + checkers_versions : dict + Mapping of checker name to requested version string. + + Returns + ------- + list + Sorted checker specs where explicit versions are kept as + ':' and 'latest' maps to unversioned ''. + """ + return sorted( + [ + checker if checker_version == "latest" else f"{checker}:{checker_version}" + for checker, checker_version in checkers_versions.items() + ] + ) + + def run_compliance_checker(file_path, checkers, checker_options={}): """ Run the compliance checker on a file with the specified checkers and options. @@ -308,7 +308,10 @@ def process_file( and os.path.isfile(result_file) and ( os.path.isfile(consistency_file) - or not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers) + or not any( + cn.split(":", 1)[0] in checker_supporting_consistency_checks + for cn in checkers + ) ) ): with open(result_file) as file: @@ -771,25 +774,11 @@ def main(): "ERROR: Cannot run both 'cc6' and 'mip' checkers at the same time." ) - # Resolve :latest to the actual highest installed version number. - # compliance-checker >= 6.0.0 removed the :latest alias, so passing - # e.g. 'cf:latest' to CheckSuite.run_all() raises a KeyError there. - # esgf-qa keeps accepting 'latest' from the user and maps it here. - if any(v == "latest" for v in checkers_versions.values()): - _installed = ( - cc_checker_versions - if "cc_checker_versions" in locals() - else get_installed_checker_versions() - ) - for _checker_i in list(checkers_versions.keys()): - if checkers_versions[_checker_i] == "latest": - checkers_versions[_checker_i] = resolve_latest_version( - _checker_i, _installed - ) - - # Combine checkers and versions - # (checker_options are hardcoded) - checkers = sorted([f"{c}:{v}" for c, v in checkers_versions.items()]) + # Normalize checker specifications for compliance-checker: + # - explicit versions are forwarded as ':' + # - omitted versions and ':latest' are both forwarded as '' + # so compliance-checker selects the highest installed version. + checkers = normalize_checker_specs(checkers_versions) # Does parent_dir exist? if parent_dir is None: @@ -810,13 +799,17 @@ def main(): with open(os.path.join(result_dir, ".resume_info"), "w") as f: json.dump(resume_info, f, sort_keys=True, indent=4) - # If only cf checker is selected, run cc6 time checks only + # If none of the selected checkers support consistency checks, + # add mip time checks so consistency output can be generated. if ( - not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers) + not any( + cn.split(":", 1)[0] in checker_supporting_consistency_checks + for cn in checkers + ) and include_consistency_checks ): time_checks_only = True - checkers.append("mip:latest") + checkers.append("mip") checkers.sort() else: time_checks_only = False @@ -1076,7 +1069,7 @@ def main(): ) del result - # Skip continuity and consistency checks if no cc6/mip checks were run + # Skip continuity and consistency checks if no appropriate checkers were run # (and thus no consistency output file was created) if any( ch.split(":", 1)[0] in checker_supporting_consistency_checks for ch in checkers @@ -1170,7 +1163,7 @@ def main(): print() print("#" * 50) print( - f"# QA Part {'3' if 'cc6:latest' in checkers or 'mip:latest' in checkers else '2'} - Summarizing and clustering the results" + f"# QA Part {'3' if any(cn.split(':')[0] in checker_supporting_consistency_checks for cn in checkers) else '2'} - Summarizing and clustering the results" ) print("#" * 50) print() diff --git a/tests/test_run_qa.py b/tests/test_run_qa.py index aaa6811..a08cc66 100644 --- a/tests/test_run_qa.py +++ b/tests/test_run_qa.py @@ -13,6 +13,7 @@ get_checker_release_versions, get_default_result_dir, get_dsid, + normalize_checker_specs, parse_options, track_checked_datasets, ) @@ -180,12 +181,13 @@ def test_parse_options(): ) assert _verify_options_dict(opt_dict) is True -from esgf_qa.run_qa import resolve_latest_version -def test_resolve_latest_version(): - installed = {"cf": ["1.10", "1.11", "latest"]} - assert resolve_latest_version("cf", installed) == "1.11" - # checker not installed -> stays as "latest" - assert resolve_latest_version("unknown", installed) == "latest" - # only one version - assert resolve_latest_version("cf", {"cf": ["1.6", "latest"]}) == "1.6" \ No newline at end of file +def test_latest_and_omitted_versions_are_equivalent_in_internal_specs(): + checkers_versions = {"cf": "latest", "cc6": "latest", "wcrp_cmip6": "1.7"} + checkers = normalize_checker_specs(checkers_versions) + + assert "cf" in checkers + assert "cc6" in checkers + assert "wcrp_cmip6:1.7" in checkers + assert "cf:latest" not in checkers + assert "cc6:latest" not in checkers From b059c56d0e6a40984eed3447c73d196d30f22249 Mon Sep 17 00:00:00 2001 From: sol1105 Date: Thu, 30 Apr 2026 16:56:55 +0200 Subject: [PATCH 3/4] Update pyproject.toml --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2106e44..581eb0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,15 +29,15 @@ classifiers = [ dependencies = [ "cftime", "cf_xarray", - "compliance-checker>=5.3.0", + "compliance-checker>=6.0.0", "dask", "netCDF4", "packaging", "pandas", "textual", "xarray", - "cc-plugin-cc6>=0.4.0", - "cc-plugin-wcrp" + "cc-plugin-cc6>=0.4.3", + "cc-plugin-wcrp>=2.1.0" ] dynamic = [ "version" From 2dffbbcb181998d5e73427176ee18cc5e597d2df Mon Sep 17 00:00:00 2001 From: sol1105 <10836031+sol1105@users.noreply.github.com> Date: Thu, 30 Apr 2026 17:14:20 +0200 Subject: [PATCH 4/4] pre-commit fixes --- tests/test_run_dummy_qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_run_dummy_qa.py b/tests/test_run_dummy_qa.py index 1a954f7..11ed49c 100644 --- a/tests/test_run_dummy_qa.py +++ b/tests/test_run_dummy_qa.py @@ -188,4 +188,4 @@ def test_process_dataset_cached(self, fake_check_suite, tmp_env, dummy_nc_file): ) assert ds_id == ds - assert result == {"cf": {"errors": {}}} \ No newline at end of file + assert result == {"cf": {"errors": {}}}