diff --git a/src/include_access_model/schema/include_access_model.yaml b/src/include_access_model/schema/include_access_model.yaml index 5d22101..35a50c0 100644 --- a/src/include_access_model/schema/include_access_model.yaml +++ b/src/include_access_model/schema/include_access_model.yaml @@ -308,39 +308,105 @@ classes: identifier: true File: title: File - description: File + # subset of info; ideally pulls from a file universe table and some kind of assay table + description: Required information for portal use. is_a: Record slots: - - file_id - - subject_id + - study_id + description: unique global study identifier file belongs to + - file_id + description: unique file identifier assigned to a file + - subject_id # do we need both a subject and sample id in this table? - sample_id - - filename + - s3_file_path # can name this url if more appropriate + description: full s3 url of an file's location in aws + - file_name + - size - format - - data_category + - data_category - data_type - format - size - #TODO: I'm not convinced this is the right strategy- access model vs operations +#TODO: I'm not convinced this is the right strategy- access model vs operations - staging_url - release_url - drs_uri - - hash + - is_released # maybe can leave this out? should only release ready data be in this model? + description: notes whether a file has been released to the public + - is_registered # maybe can leave this out? should only drs registered data be in this model? + description: notes whether a file has been registered to a drs service + - repository + description: name of drs service files are registered to + - hash + description: file hash value + slot_usage: + study_id: + range: string + required: true + identifier: true file_id: range: string required: true identifier: true subject_id: - multivalued: true + multivalued: true sample_id: multivalued: true + file_name: + range: string + required: true + format: + range: string + required: true + size: + range: integer + required: true + s3_file_path: + range: string + required: true + hash: + range: + required: true + access_url: + range: string + required: false + drs_uri: + range: string + required: false + data_category: + range: string + required: true + data_type: + range: string + required: true + experimental_strategy: + range: string + required: true + access_type: + range: string + required: true + is_released: + range: boolean + required: true + is_registered: + range: boolean + required: true + repository: + range: string + required: true + access_url: + range: string + required: true + FileHash: title: File Hash description: Type and value of a file content hash. slots: - hash_type - hash_value - Dataset: + +Dataset: title: Dataset description: Set of files grouped together for release. slots: @@ -363,7 +429,230 @@ classes: multivalued: true description: The list of files comprising this dataset. +FileAdmin: # names are TBD; can change - idea is this is operational or file universe model + # probs doesn't go into the "access model" - but here is what could go there + title: File Admin + description: File unvierse; contains all information about a file that may be needed for operational work + slots: + - study_id + description: unique global study identifier file belongs to + - file_id + description: unique file identifier assigned to a file + - subject_id # do we need both a subject and sample id in this table? + - sample_id + - s3_file_path # can name this url if more appropriate + description: full s3 url of an file's location in aws + - file_category + description: a high level classfication of the file (e.g., omics file, imaging file) + - size + - s3_key + - file_extension + - data_transfer_id + description: jira ticket number associated with a file transfer request to production bucket + - aws_account_id + - account_name + - account_alias + - bucket_study_id + description: global study ID used to create the bucket + - bucket + - s3_created_at + - s3_modified_at + - intelligent_tiering_access + - is_delete_marker + description: notes whether a file has been deleted from s3 + - is_latest + - storage_class + - manifest_hash_value + - file_hash_validation_status + - file_type + - encryption_status + - is_multipart_uploaded + - object_lock_leval_hold_status + - object_lock_mode + - object_lock_retain_until_date + - replication_status + - version_id + - staging_url + description: s3 location of a file before its made public + - release_url + description: production location of a publically available file + - hash + description: file hash value + - access_type + description: notes wheter a file is controlled, open, or registered-tier access + - access_url + description: + - drs_uri + - acl + description: access control list for the file + - is_released + description: notes whether a file has been released to the public + - is_registered + description: notes whether a file has been registered to a drs service + - repository + description: name of drs service files are registered to + - experiment_strategy + + slot_usage: + study_id: + range: string + required: true + identifier: true + file_id: + range: string + required: true + identifier: true + file_category: + range: string + required: true + s3_key: + range: string + required: true + file_extension: + range: string + required: true + data_transfer_id: + range: string + required: false + aws_account_id: + range: string + required: true + account_name: + range: string + required: true + account_alias: + range: string + required: true + bucket_study_id: + range: string + required: false + bucket: + range: string + required: false + s3_created_at: + range: date + required: true + s3_modified_at: + range: date + required: true + intelligent_tiering_access: + range: string + required: true + is_delete_marker: + range: boolean + required: true + is_latest: + range: boolean + required: true + storage_class: + range: string + required: true + manifest_hash_value: + range: + required: false + file_hash_validation_status: + range: string + required: false + file_type: + range: string + required: true + size: + range: integer + required: true + staging_url: + range: string + required: true + release_url: + range: string + required: true + hash: + range: + required: true + access_type: + range: string + required: true + access_url: + range: string + requried: true + acl: + range: string + required: true + multivalued: true + repository: + range: string + required: true + is_released: + range: boolean + required: true + is_registered: + range: boolean + required: true + experimental_strategy: + range: string + required: true + data_category: + range: string + required: true + description: high-level category of the data used for filtering + data_type: + range: string + required: true + FileAssay: + title: File Assay + # for now group all types into one table; but we may want to split out since different + # assay types collect different types of information + # this is a basic model + description: A file produced by or associated with an assay or data + acquisition process including omics, imaging, actigraphy, and other experimental or observational data. + slots: + file_id: + range: string + required: true + identifier: true + subject_id: + range: string + required: true + sample_id: + range: string + required: true + data_category: + range: string + required: true + description: high-level category of the data used for filtering + experimental_strategy: + range: string + required: true + descirption: method or assay used to generate the data + data_type: + range: string + required: true + description: specific type of data contained in the file + format: + range: string + required: true + size: + range: integer + required: true + access_type: + range: string + required: true + assay_center: + range: string + required: false + description: the organization or center that generated the file + platform: + range: string + required: false + description: instrument or platform family name + workflow_name: + range: string + required: false + description: processing tool that produced the file + workflow_version: + range: string + required: false + slots: study_id: title: Study ID