include-dcc · Christina-J-Diaz · Mar 16, 2026 · Mar 16, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/src/include_access_model/schema/include_access_model.yaml b/src/include_access_model/schema/include_access_model.yaml
@@ -308,39 +308,105 @@ classes:
         identifier: true
   File:
     title: File
-    description: File
+    # subset of info; ideally pulls from a file universe table and some kind of assay table
+    description: Required information for portal use.
     is_a: Record 
     slots: 
-      - file_id
-      - subject_id
+      - study_id 
+        description: unique global study identifier file belongs to
+      - file_id 
+      description: unique file identifier assigned to a file 
+      - subject_id # do we need both a subject and sample id in this table? 
       - sample_id
-      - filename
+      - s3_file_path # can name this url if more appropriate 
+        description: full s3 url of an file's location in aws
+      - file_name
+      - size
       - format
-      - data_category
+      - data_category 
       - data_type
       - format
       - size
-      #TODO: I'm not convinced this is the right strategy- access model vs operations
+#TODO: I'm not convinced this is the right strategy- access model vs operations
       - staging_url
       - release_url
       - drs_uri
-      - hash
+      - is_released # maybe can leave this out? should only release ready data be in this model? 
+        description: notes whether a file has been released to the public
+      - is_registered # maybe can leave this out? should only drs registered data be in this model? 
+        description: notes whether a file has been registered to a drs service
+      - repository 
+        description: name of drs service files are registered to 
+      - hash 
+        description: file hash value
+
     slot_usage:
+      study_id: 
+        range: string
+        required: true 
+        identifier: true 
       file_id:
         range: string
         required: true
         identifier: true
       subject_id:
-        multivalued: true
+        multivalued: true 
       sample_id:
         multivalued: true
+      file_name: 
+        range: string
+        required: true 
+      format: 
+        range: string
+        required: true 
+      size: 
+        range: integer
+        required: true 
+      s3_file_path:
+        range: string
+        required: true 
+      hash: 
+        range: 
+        required: true
+      access_url: 
+        range: string
+        required: false
+      drs_uri: 
+        range: string
+        required: false
+      data_category: 
+        range: string
+        required: true 
+      data_type: 
+        range: string
+        required: true 
+      experimental_strategy: 
+        range: string
+        required: true 
+      access_type: 
+        range: string
+        required: true 
+      is_released: 
+        range: boolean
+        required: true 
+      is_registered: 
+        range: boolean
+        required: true 
+      repository: 
+        range: string
+        required: true 
+      access_url: 
+        range: string
+        required: true 
+
   FileHash:
     title: File Hash
     description: Type and value of a file content hash.
     slots:
       - hash_type
       - hash_value
-  Dataset:
+
+Dataset:
     title: Dataset
     description: Set of files grouped together for release.
     slots:
@@ -363,7 +429,230 @@ classes:
         multivalued: true
         description: The list of files comprising this dataset.
 
+FileAdmin: # names are TBD; can change - idea is this is operational or file universe model
+  # probs doesn't go into the "access model" - but here is what could go there
+    title: File Admin
+    description: File unvierse; contains all information about a file that may be needed for operational work
+    slots:  
+      - study_id 
+        description: unique global study identifier file belongs to
+      - file_id 
+        description: unique file identifier assigned to a file 
+      - subject_id # do we need both a subject and sample id in this table? 
+      - sample_id
+      - s3_file_path # can name this url if more appropriate 
+        description: full s3 url of an file's location in aws
+      - file_category 
+        description: a high level classfication of the file (e.g., omics file, imaging file)
+      - size
+      - s3_key
+      - file_extension
+      - data_transfer_id
+        description: jira ticket number associated with a file transfer request to production bucket
+      - aws_account_id
+      - account_name
+      - account_alias
+      - bucket_study_id
+        description: global study ID used to create the bucket
+      - bucket
+      - s3_created_at
+      - s3_modified_at
+      - intelligent_tiering_access
+      - is_delete_marker
+        description: notes whether a file has been deleted from s3
+      - is_latest
+      - storage_class
+      - manifest_hash_value
+      - file_hash_validation_status
+      - file_type
+      - encryption_status
+      - is_multipart_uploaded
+      - object_lock_leval_hold_status
+      - object_lock_mode
+      - object_lock_retain_until_date
+      - replication_status
+      - version_id
+      - staging_url
+        description: s3 location of a file before its made public
+      - release_url
+        description: production location of a publically available file 
+      - hash
+        description: file hash value
+      - access_type
+        description: notes wheter a file is controlled, open, or registered-tier access
+      - access_url
+        description: 
+      - drs_uri
+      - acl
+        description: access control list for the file
+      - is_released
+        description: notes whether a file has been released to the public
+      - is_registered
+        description: notes whether a file has been registered to a drs service
+      - repository 
+        description: name of drs service files are registered to 
+      - experiment_strategy
+
+    slot_usage: 
+      study_id: 
+        range: string
+        required: true 
+        identifier: true 
+      file_id: 
+        range: string
+        required: true 
+        identifier: true 
+     file_category: 
+        range: string
+        required: true
+      s3_key: 
+        range: string
+        required: true
+      file_extension: 
+        range: string
+        required: true
+      data_transfer_id: 
+        range: string
+        required: false 
+      aws_account_id: 
+        range: string
+        required: true
+      account_name: 
+        range: string
+        required: true 
+      account_alias: 
+        range: string
+        required: true 
+      bucket_study_id: 
+        range: string
+        required: false 
+      bucket: 
+        range: string
+        required: false
+      s3_created_at: 
+        range: date
+        required: true
+      s3_modified_at: 
+        range: date
+        required: true
+      intelligent_tiering_access: 
+        range: string
+        required: true
+      is_delete_marker: 
+        range: boolean
+        required: true
+      is_latest: 
+        range: boolean
+        required: true
+      storage_class: 
+        range: string
+        required: true
+      manifest_hash_value: 
+        range: 
+        required: false
+      file_hash_validation_status: 
+        range: string
+        required: false
+      file_type: 
+        range: string
+        required: true
+      size: 
+        range: integer
+        required: true 
+      staging_url: 
+        range: string
+        required: true 
+      release_url: 
+        range: string
+        required: true 
+      hash: 
+        range: 
+        required: true 
+      access_type: 
+        range: string
+        required: true 
+      access_url:  
+        range: string
+        requried: true 
+      acl: 
+        range: string
+        required: true 
+        multivalued: true 
+      repository: 
+        range: string
+        required: true 
+      is_released: 
+        range: boolean
+        required: true 
+      is_registered: 
+        range: boolean
+        required: true 
+      experimental_strategy: 
+        range: string
+        required: true 
+      data_category: 
+        range: string
+        required: true 
+        description: high-level category of the data used for filtering
+      data_type: 
+        range: string
+        required: true 
 
+  FileAssay:
+    title: File Assay
+    # for now group all types into one table; but we may want to split out since different 
+    # assay types collect different types of information
+    # this is a basic model
+    description: A file produced by or associated with an assay or data
+    acquisition process including omics, imaging, actigraphy, and other experimental or observational data.
+    slots:
+      file_id: 
+        range: string
+        required: true 
+        identifier: true 
+      subject_id: 
+        range: string
+        required: true 
+      sample_id: 
+        range: string
+        required: true 
+      data_category: 
+        range: string
+        required: true 
+        description: high-level category of the data used for filtering
+      experimental_strategy: 
+        range: string
+        required: true 
+        descirption: method or assay used to generate the data
+      data_type: 
+        range: string
+        required: true 
+        description: specific type of data contained in the file
+      format: 
+        range: string
+        required: true 
+      size: 
+        range: integer 
+        required: true 
+      access_type: 
+        range: string
+        required: true 
+      assay_center: 
+        range: string
+        required: false 
+        description: the organization or center that generated the file
+      platform:
+        range: string
+        required: false
+        description: instrument or platform family name
+      workflow_name:
+        range: string
+        required: false 
+        description: processing tool that produced the file
+      workflow_version: 
+        range: string
+        required: false 
+
 slots: 
   study_id:
     title: Study ID