diff --git a/README.md b/README.md index de48da0..ef77633 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ $ esgqa [-h] [-P ] [-o ] [-t ] [-O OPTION] - `-h, --help`: show this help message and exit - `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores). - `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_`. - - `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used. + - `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks. If the version is omitted, `latest` will be used (`'cf'` and `'cf:latest'` are equivalent). - `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `':[:]'`. Multiple invocations possible. - `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run. - `-r, --resume`: Specify to continue a previous QC run. Requires the `` argument to be set. diff --git a/esgf_qa/run_qa.py b/esgf_qa/run_qa.py index 6482064..e0c3e04 100644 --- a/esgf_qa/run_qa.py +++ b/esgf_qa/run_qa.py @@ -140,23 +140,57 @@ def get_checker_release_versions(checkers, checker_options={}): check_suite = CheckSuite(options=checker_options) check_suite.load_all_available_checkers() for checker in checkers: - if checker.split(":")[0] not in checker_release_versions: - if checker.split(":")[0] in checker_dict: - checker_release_versions[checker.split(":")[0]] = ( - check_suite.checkers.get( - checker, "unknown version" - )._cc_spec_version - ) - elif checker.split(":")[0] in checker_dict_ext: - checker_release_versions[checker.split(":")[0]] = version + checker_name = checker.split(":")[0] + if checker_name not in checker_release_versions: + if checker_name in checker_dict_ext and checker_name not in checker_dict: + # Internal esgf-qa checker (cons, cont, comp) - use esgf-qa version + checker_release_versions[checker_name] = version else: - checker_release_versions[checker.split(":")[0]] = ( - check_suite.checkers.get( - checker, "unknown version" - )._cc_spec_version + # compliance-checker plugin: look up _cc_spec_version. + # CC >= 6.0.0 removed :latest, so fall back to the highest + # explicitly versioned key when the requested key is missing. + checker_obj = check_suite.checkers.get(checker) + if checker_obj is None: + prefix = checker_name + ":" + candidates = [ + k for k in check_suite.checkers if k.startswith(prefix) + ] + if candidates: + resolved_key = max( + candidates, + key=lambda k: pversion.parse(k.split(":")[1]), + ) + checker_obj = check_suite.checkers.get(resolved_key) + checker_release_versions[checker_name] = ( + checker_obj._cc_spec_version + if checker_obj is not None + else "unknown" ) +def normalize_checker_specs(checkers_versions): + """ + Normalize checker specifications for compliance-checker. + + Parameters + ---------- + checkers_versions : dict + Mapping of checker name to requested version string. + + Returns + ------- + list + Sorted checker specs where explicit versions are kept as + ':' and 'latest' maps to unversioned ''. + """ + return sorted( + [ + checker if checker_version == "latest" else f"{checker}:{checker_version}" + for checker, checker_version in checkers_versions.items() + ] + ) + + def run_compliance_checker(file_path, checkers, checker_options={}): """ Run the compliance checker on a file with the specified checkers and options. @@ -194,7 +228,7 @@ def run_compliance_checker(file_path, checkers, checker_options={}): if include_checks: results = {} for checker in checkers: - if include_checks and "cc6:latest" in checker or "mip:latest" in checker: + if include_checks and checker.split(":")[0] in ("cc6", "mip"): results.update( check_suite.run_all(ds, [checker], include_checks, skip_checks=[]) ) @@ -274,7 +308,10 @@ def process_file( and os.path.isfile(result_file) and ( os.path.isfile(consistency_file) - or not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers) + or not any( + cn.split(":", 1)[0] in checker_supporting_consistency_checks + for cn in checkers + ) ) ): with open(result_file) as file: @@ -737,9 +774,11 @@ def main(): "ERROR: Cannot run both 'cc6' and 'mip' checkers at the same time." ) - # Combine checkers and versions - # (checker_options are hardcoded) - checkers = sorted([f"{c}:{v}" for c, v in checkers_versions.items()]) + # Normalize checker specifications for compliance-checker: + # - explicit versions are forwarded as ':' + # - omitted versions and ':latest' are both forwarded as '' + # so compliance-checker selects the highest installed version. + checkers = normalize_checker_specs(checkers_versions) # Does parent_dir exist? if parent_dir is None: @@ -760,13 +799,17 @@ def main(): with open(os.path.join(result_dir, ".resume_info"), "w") as f: json.dump(resume_info, f, sort_keys=True, indent=4) - # If only cf checker is selected, run cc6 time checks only + # If none of the selected checkers support consistency checks, + # add mip time checks so consistency output can be generated. if ( - not any(cn.startswith("cc6") or cn.startswith("mip") for cn in checkers) + not any( + cn.split(":", 1)[0] in checker_supporting_consistency_checks + for cn in checkers + ) and include_consistency_checks ): time_checks_only = True - checkers.append("mip:latest") + checkers.append("mip") checkers.sort() else: time_checks_only = False @@ -1026,7 +1069,7 @@ def main(): ) del result - # Skip continuity and consistency checks if no cc6/mip checks were run + # Skip continuity and consistency checks if no appropriate checkers were run # (and thus no consistency output file was created) if any( ch.split(":", 1)[0] in checker_supporting_consistency_checks for ch in checkers @@ -1120,7 +1163,7 @@ def main(): print() print("#" * 50) print( - f"# QA Part {'3' if 'cc6:latest' in checkers or 'mip:latest' in checkers else '2'} - Summarizing and clustering the results" + f"# QA Part {'3' if any(cn.split(':')[0] in checker_supporting_consistency_checks for cn in checkers) else '2'} - Summarizing and clustering the results" ) print("#" * 50) print() diff --git a/pyproject.toml b/pyproject.toml index 2106e44..581eb0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,15 +29,15 @@ classifiers = [ dependencies = [ "cftime", "cf_xarray", - "compliance-checker>=5.3.0", + "compliance-checker>=6.0.0", "dask", "netCDF4", "packaging", "pandas", "textual", "xarray", - "cc-plugin-cc6>=0.4.0", - "cc-plugin-wcrp" + "cc-plugin-cc6>=0.4.3", + "cc-plugin-wcrp>=2.1.0" ] dynamic = [ "version" diff --git a/tests/test_run_qa.py b/tests/test_run_qa.py index a7eb43c..a08cc66 100644 --- a/tests/test_run_qa.py +++ b/tests/test_run_qa.py @@ -13,6 +13,7 @@ get_checker_release_versions, get_default_result_dir, get_dsid, + normalize_checker_specs, parse_options, track_checked_datasets, ) @@ -179,3 +180,14 @@ def test_parse_options(): }, ) assert _verify_options_dict(opt_dict) is True + + +def test_latest_and_omitted_versions_are_equivalent_in_internal_specs(): + checkers_versions = {"cf": "latest", "cc6": "latest", "wcrp_cmip6": "1.7"} + checkers = normalize_checker_specs(checkers_versions) + + assert "cf" in checkers + assert "cc6" in checkers + assert "wcrp_cmip6:1.7" in checkers + assert "cf:latest" not in checkers + assert "cc6:latest" not in checkers