diff --git a/hca/staging_area_validator.py b/hca/staging_area_validator.py index eee0a0c..48d8f4a 100644 --- a/hca/staging_area_validator.py +++ b/hca/staging_area_validator.py @@ -209,8 +209,8 @@ def validate_metadata_file(self, blob: gcs.Blob) -> None: metadata_id, metadata_version = metadata_file[:-5].split("_") file_json = self.download_blob_as_json(blob) self.validate_file_json(file_json, blob.name) - if provenance := file_json.get("provenance"): - assert metadata_id == provenance["document_id"] + provenance = file_json["provenance"] + assert metadata_id == provenance["document_id"] if metadata_file := self.metadata_files.get(metadata_id): metadata_file["name"].add(blob.name) metadata_file["metadata_versions"].add(metadata_version) @@ -218,22 +218,28 @@ def validate_metadata_file(self, blob: gcs.Blob) -> None: if metadata_type.endswith("_file"): metadata_file["data_file_name"] = file_json["file_core"]["file_name"] metadata_file["found_data_file"] = False - if metadata_type == "supplementary_file" and file_json.get( - "provenance", {} - ).get("submitter_id"): - try: - self.validate_file_description(file_json.get("file_description")) - except Exception as e: - self.file_errors[blob.name] = e - metadata_file["valid_stratification"] = False - log.error("Invalid file_description in %s.", blob.name) - else: - metadata_file["valid_stratification"] = True + if metadata_type == "supplementary_file" and "submitter_id" in provenance: + content_description = file_json["file_core"].get( + "content_description", [] + ) + if any( + "matrix" in v.lower() + for cd in content_description + for v in cd.values() + ): + try: + self.validate_stratification(file_json.get("file_description")) + except Exception as e: + self.file_errors[blob.name] = e + metadata_file["valid_stratification"] = False + log.error("Invalid file_description in %s.", blob.name) + else: + metadata_file["valid_stratification"] = True else: self.extra_files.append(blob.name) - def validate_file_description(self, file_description: str) -> None: - if not file_description: + def validate_stratification(self, stratification: str) -> None: + if not stratification: return strata = [ { @@ -242,7 +248,7 @@ def validate_file_description(self, file_description: str) -> None: point.split("=") for point in stratum.split(";") ) } - for stratum in file_description.split("\n") + for stratum in stratification.split("\n") ] log.debug(strata) valid_keys = [