Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 298 additions & 9 deletions src/include_access_model/schema/include_access_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -308,39 +308,105 @@ classes:
identifier: true
File:
title: File
description: File
# subset of info; ideally pulls from a file universe table and some kind of assay table
description: Required information for portal use.
is_a: Record
slots:
- file_id
- subject_id
- study_id
description: unique global study identifier file belongs to
- file_id
description: unique file identifier assigned to a file
- subject_id # do we need both a subject and sample id in this table?
- sample_id
- filename
- s3_file_path # can name this url if more appropriate
description: full s3 url of an file's location in aws
- file_name
- size
- format
- data_category
- data_category
- data_type
- format
- size
#TODO: I'm not convinced this is the right strategy- access model vs operations
#TODO: I'm not convinced this is the right strategy- access model vs operations
- staging_url
- release_url
- drs_uri
- hash
- is_released # maybe can leave this out? should only release ready data be in this model?
description: notes whether a file has been released to the public
- is_registered # maybe can leave this out? should only drs registered data be in this model?
description: notes whether a file has been registered to a drs service
- repository
description: name of drs service files are registered to
- hash
description: file hash value

slot_usage:
study_id:
range: string
required: true
identifier: true
file_id:
range: string
required: true
identifier: true
subject_id:
multivalued: true
multivalued: true
sample_id:
multivalued: true
file_name:
range: string
required: true
format:
range: string
required: true
size:
range: integer
required: true
s3_file_path:
range: string
required: true
hash:
range:
required: true
access_url:
range: string
required: false
drs_uri:
range: string
required: false
data_category:
range: string
required: true
data_type:
range: string
required: true
experimental_strategy:
range: string
required: true
access_type:
range: string
required: true
is_released:
range: boolean
required: true
is_registered:
range: boolean
required: true
repository:
range: string
required: true
access_url:
range: string
required: true

FileHash:
title: File Hash
description: Type and value of a file content hash.
slots:
- hash_type
- hash_value
Dataset:

Dataset:
title: Dataset
description: Set of files grouped together for release.
slots:
Expand All @@ -363,7 +429,230 @@ classes:
multivalued: true
description: The list of files comprising this dataset.

FileAdmin: # names are TBD; can change - idea is this is operational or file universe model
# probs doesn't go into the "access model" - but here is what could go there
title: File Admin
description: File unvierse; contains all information about a file that may be needed for operational work
slots:
- study_id
description: unique global study identifier file belongs to
- file_id
description: unique file identifier assigned to a file
- subject_id # do we need both a subject and sample id in this table?
- sample_id
- s3_file_path # can name this url if more appropriate
description: full s3 url of an file's location in aws
- file_category
description: a high level classfication of the file (e.g., omics file, imaging file)
- size
- s3_key
- file_extension
- data_transfer_id
description: jira ticket number associated with a file transfer request to production bucket
- aws_account_id
- account_name
- account_alias
- bucket_study_id
description: global study ID used to create the bucket
- bucket
- s3_created_at
- s3_modified_at
- intelligent_tiering_access
- is_delete_marker
description: notes whether a file has been deleted from s3
- is_latest
- storage_class
- manifest_hash_value
- file_hash_validation_status
- file_type
- encryption_status
- is_multipart_uploaded
- object_lock_leval_hold_status
- object_lock_mode
- object_lock_retain_until_date
- replication_status
- version_id
- staging_url
description: s3 location of a file before its made public
- release_url
description: production location of a publically available file
- hash
description: file hash value
- access_type
description: notes wheter a file is controlled, open, or registered-tier access
- access_url
description:
- drs_uri
- acl
description: access control list for the file
- is_released
description: notes whether a file has been released to the public
- is_registered
description: notes whether a file has been registered to a drs service
- repository
description: name of drs service files are registered to
- experiment_strategy

slot_usage:
study_id:
range: string
required: true
identifier: true
file_id:
range: string
required: true
identifier: true
file_category:
range: string
required: true
s3_key:
range: string
required: true
file_extension:
range: string
required: true
data_transfer_id:
range: string
required: false
aws_account_id:
range: string
required: true
account_name:
range: string
required: true
account_alias:
range: string
required: true
bucket_study_id:
range: string
required: false
bucket:
range: string
required: false
s3_created_at:
range: date
required: true
s3_modified_at:
range: date
required: true
intelligent_tiering_access:
range: string
required: true
is_delete_marker:
range: boolean
required: true
is_latest:
range: boolean
required: true
storage_class:
range: string
required: true
manifest_hash_value:
range:
required: false
file_hash_validation_status:
range: string
required: false
file_type:
range: string
required: true
size:
range: integer
required: true
staging_url:
range: string
required: true
release_url:
range: string
required: true
hash:
range:
required: true
access_type:
range: string
required: true
access_url:
range: string
requried: true
acl:
range: string
required: true
multivalued: true
repository:
range: string
required: true
is_released:
range: boolean
required: true
is_registered:
range: boolean
required: true
experimental_strategy:
range: string
required: true
data_category:
range: string
required: true
description: high-level category of the data used for filtering
data_type:
range: string
required: true

FileAssay:
title: File Assay
# for now group all types into one table; but we may want to split out since different
# assay types collect different types of information
# this is a basic model
description: A file produced by or associated with an assay or data
acquisition process including omics, imaging, actigraphy, and other experimental or observational data.
slots:
file_id:
range: string
required: true
identifier: true
subject_id:
range: string
required: true
sample_id:
range: string
required: true
data_category:
range: string
required: true
description: high-level category of the data used for filtering
experimental_strategy:
range: string
required: true
descirption: method or assay used to generate the data
data_type:
range: string
required: true
description: specific type of data contained in the file
format:
range: string
required: true
size:
range: integer
required: true
access_type:
range: string
required: true
assay_center:
range: string
required: false
description: the organization or center that generated the file
platform:
range: string
required: false
description: instrument or platform family name
workflow_name:
range: string
required: false
description: processing tool that produced the file
workflow_version:
range: string
required: false

slots:
study_id:
title: Study ID
Expand Down
Loading