Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 22 additions & 16 deletions hca/staging_area_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,31 +209,37 @@ def validate_metadata_file(self, blob: gcs.Blob) -> None:
metadata_id, metadata_version = metadata_file[:-5].split("_")
file_json = self.download_blob_as_json(blob)
self.validate_file_json(file_json, blob.name)
if provenance := file_json.get("provenance"):
assert metadata_id == provenance["document_id"]
provenance = file_json["provenance"]
assert metadata_id == provenance["document_id"]
if metadata_file := self.metadata_files.get(metadata_id):
metadata_file["name"].add(blob.name)
metadata_file["metadata_versions"].add(metadata_version)
metadata_file["found_metadata"] = True
if metadata_type.endswith("_file"):
metadata_file["data_file_name"] = file_json["file_core"]["file_name"]
metadata_file["found_data_file"] = False
if metadata_type == "supplementary_file" and file_json.get(
"provenance", {}
).get("submitter_id"):
try:
self.validate_file_description(file_json.get("file_description"))
except Exception as e:
self.file_errors[blob.name] = e
metadata_file["valid_stratification"] = False
log.error("Invalid file_description in %s.", blob.name)
else:
metadata_file["valid_stratification"] = True
if metadata_type == "supplementary_file" and "submitter_id" in provenance:
content_description = file_json["file_core"].get(
"content_description", []
)
if any(
"matrix" in v.lower()
for cd in content_description
for v in cd.values()
):
try:
self.validate_stratification(file_json.get("file_description"))
except Exception as e:
self.file_errors[blob.name] = e
metadata_file["valid_stratification"] = False
log.error("Invalid file_description in %s.", blob.name)
else:
metadata_file["valid_stratification"] = True
else:
self.extra_files.append(blob.name)

def validate_file_description(self, file_description: str) -> None:
if not file_description:
def validate_stratification(self, stratification: str) -> None:
if not stratification:
return
strata = [
{
Expand All @@ -242,7 +248,7 @@ def validate_file_description(self, file_description: str) -> None:
point.split("=") for point in stratum.split(";")
)
}
for stratum in file_description.split("\n")
for stratum in stratification.split("\n")
]
log.debug(strata)
valid_keys = [
Expand Down